Data
We load and scale the data.
Code
data_clean <- read_sav ("~/Amber/Data/FINDEM/Final_survey.sav" )
data_scale <- data_clean %>% dplyr:: select (- Einnummer, - Cluster, - Date, - Timepoint, - Meetweek) %>%
mutate (Gender = factor (Gender) %>% fct_recode ("Male" = "1" , "Female" = "2" )) %>%
mutate (PICA = factor (PICA) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" )) %>%
mutate (Intervention_months_factor = factor (case_when (Intervention_months_s < 6 ~ 0 , Intervention_months_s >= 6 & Intervention_months_s < 12 ~ 1 , Intervention_months_s >= 12 & Intervention_months_s < 18 ~ 2 , Intervention_months_s >= 18 & Intervention_months_s < 24 ~ 3 ,Intervention_months_s >= 24 ~ 4 ))) %>%
mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" )) %>%
mutate (Warmglow = as.numeric (Warmglow)) %>% mutate (Warmglow = as.factor (case_when (Warmglow >= 6 ~ 1 , TRUE & ! is.na (Warmglow) ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ))
data_scale$ Age <- scale (data_scale$ Age, center= TRUE , scale = FALSE )
data_scale$ Weight <- scale (data_scale$ Weight, center= TRUE , scale = FALSE )
data_scale$ Height <- scale (data_scale$ Height, center= TRUE , scale = FALSE )
Descriptives
We present a descriptives table for the outcomes we will analyse, including:
PICA
Restless legs syndrome (RLS)
Cognitive functioning (CFQ)
Physical health (SF_ph)
Mental health (SF_mh)
Warm glow
Fatigue (CIS)
Code
#| label: t1 premenopausal females
data_t1 <- data_scale %>% mutate (stap2 = as.factor (stap), stap2 = recode_factor (stap2, "2.1" = "2" ,"2.2" = "2" , "3.1" = "3" , "3.2" = "3" ), stap2 = factor (stap2,levels = c ("1" , "2" , "3" , "4" )))
prefemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 0 ), caption = "Premenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
prefemalest1
Premenopausal females
1(N=656)
2(N=675)
3(N=657)
4(N=460)
Overall(N=2448)
Age
Mean (SD)
-13.3 (7.51)
-13.6 (7.75)
-13.7 (7.48)
-14.0 (7.72)
-13.6 (7.60)
Median [Q1, Q3]
-15.0 [-19.0, -8.02]
-16.0 [-20.0, -8.02]
-16.0 [-20.0, -8.02]
-16.0 [-20.0, -9.02]
-16.0 [-20.0, -8.02]
Hb
Mean (SD)
9.96 (38.7)
23.1 (120)
11.4 (54.6)
10.5 (46.2)
14.1 (74.7)
Median [Q1, Q3]
8.40 [8.00, 8.80]
8.40 [8.00, 8.70]
8.30 [8.10, 8.80]
8.40 [8.00, 8.70]
8.40 [8.00, 8.70]
Ferritin
Mean (SD)
33.2 (30.3)
30.3 (30.5)
30.2 (32.0)
28.5 (26.4)
30.7 (30.2)
Median [Q1, Q3]
24.2 [13.5, 43.6]
22.6 [12.0, 38.5]
20.6 [11.7, 39.3]
22.0 [11.1, 38.9]
22.7 [12.1, 39.9]
Missing
29 (4.4%)
47 (7.0%)
35 (5.3%)
41 (8.9%)
152 (6.2%)
factor(PICA)
No
607 (92.5)
635 (94.1)
623 (94.8)
427 (92.8)
2292 (93.6)
Yes
11 (1.7)
13 (1.9)
12 (1.8)
18 (3.9)
54 (2.2)
Missing
38 (5.8%)
27 (4.0%)
22 (3.3%)
15 (3.3%)
102 (4.2%)
factor(RLS)
No
528 (80.5)
549 (81.3)
515 (78.4)
378 (82.2)
1970 (80.5)
Yes
41 (6.3)
50 (7.4)
51 (7.8)
29 (6.3)
171 (7.0)
Missing
87 (13.3%)
76 (11.3%)
91 (13.9%)
53 (11.5%)
307 (12.5%)
CFQ_Total
Mean (SD)
31.7 (14.0)
29.4 (12.6)
31.3 (12.8)
32.1 (13.5)
31.0 (13.3)
Median [Q1, Q3]
30.0 [23.0, 40.0]
28.0 [21.0, 37.0]
30.0 [23.0, 39.0]
31.0 [23.0, 40.0]
30.0 [22.0, 39.0]
Missing
37 (5.6%)
30 (4.4%)
22 (3.3%)
16 (3.5%)
105 (4.3%)
SF_ph
Mean (SD)
89.5 (9.60)
90.3 (9.89)
89.4 (10.6)
89.7 (9.33)
89.7 (9.91)
Median [Q1, Q3]
92.4 [86.1, 96.2]
92.5 [87.5, 96.3]
92.5 [86.9, 96.2]
92.4 [86.2, 96.2]
92.5 [86.8, 96.3]
Missing
22 (3.4%)
8 (1.2%)
10 (1.5%)
9 (2.0%)
49 (2.0%)
SF_mh
Mean (SD)
76.6 (14.1)
76.6 (15.1)
76.4 (14.9)
74.4 (16.5)
76.1 (15.1)
Median [Q1, Q3]
81.3 [72.3, 86.3]
81.5 [72.2, 86.5]
81.4 [73.4, 86.0]
80.5 [68.9, 85.4]
81.3 [72.1, 86.3]
Missing
19 (2.9%)
7 (1.0%)
9 (1.4%)
8 (1.7%)
43 (1.8%)
Warmglow
No
251 (38.3)
262 (38.8)
282 (42.9)
223 (48.5)
1018 (41.6)
Yes
312 (47.6)
341 (50.5)
304 (46.3)
193 (42.0)
1150 (47.0)
Missing
93 (14.2%)
72 (10.7%)
71 (10.8%)
44 (9.6%)
280 (11.4%)
CIS_Totalmean
Mean (SD)
82.3 (6.54)
82.5 (6.50)
82.4 (6.18)
81.8 (6.23)
82.3 (6.38)
Median [Q1, Q3]
83.0 [78.0, 87.0]
83.0 [79.0, 87.0]
83.0 [78.0, 87.0]
83.0 [78.0, 86.0]
83.0 [78.0, 87.0]
Missing
35 (5.3%)
23 (3.4%)
18 (2.7%)
11 (2.4%)
87 (3.6%)
Code
postfemalest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Female" & data_t1$ PostMeno== 1 ), caption = "Postmenopausal females" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
postfemalest1
Postmenopausal females
1(N=352)
2(N=379)
3(N=456)
4(N=304)
Overall(N=1491)
Age
Mean (SD)
13.5 (6.76)
13.9 (6.82)
13.6 (6.56)
13.8 (6.91)
13.7 (6.74)
Median [Q1, Q3]
13.0 [7.98, 19.0]
13.0 [7.98, 18.5]
13.0 [7.98, 19.0]
13.0 [7.98, 19.2]
13.0 [7.98, 19.0]
Hb
Mean (SD)
8.56 (0.535)
19.0 (101)
17.2 (92.5)
8.48 (0.494)
13.8 (72.4)
Median [Q1, Q3]
8.50 [8.20, 8.90]
8.40 [8.10, 8.80]
8.50 [8.10, 8.90]
8.40 [8.10, 8.80]
8.50 [8.10, 8.90]
Ferritin
Mean (SD)
35.7 (28.5)
33.7 (26.6)
35.0 (35.3)
34.3 (30.6)
34.7 (30.6)
Median [Q1, Q3]
29.5 [17.7, 43.1]
27.1 [16.5, 44.3]
25.4 [13.4, 43.7]
26.1 [14.8, 44.0]
27.0 [15.3, 44.0]
Missing
10 (2.8%)
19 (5.0%)
37 (8.1%)
15 (4.9%)
81 (5.4%)
factor(PICA)
No
338 (96.0)
365 (96.3)
442 (96.9)
296 (97.4)
1441 (96.6)
Yes
8 (2.3)
6 (1.6)
6 (1.3)
5 (1.6)
25 (1.7)
Missing
6 (1.7%)
8 (2.1%)
8 (1.8%)
3 (1.0%)
25 (1.7%)
factor(RLS)
No
277 (78.7)
305 (80.5)
339 (74.3)
233 (76.6)
1154 (77.4)
Yes
28 (8.0)
27 (7.1)
60 (13.2)
35 (11.5)
150 (10.1)
Missing
47 (13.4%)
47 (12.4%)
57 (12.5%)
36 (11.8%)
187 (12.5%)
CFQ_Total
Mean (SD)
26.5 (11.8)
26.4 (11.1)
25.6 (10.3)
26.4 (12.1)
26.2 (11.2)
Median [Q1, Q3]
26.0 [18.0, 34.0]
26.0 [18.0, 34.0]
25.0 [19.0, 33.0]
25.0 [17.0, 34.0]
25.0 [18.0, 34.0]
Missing
5 (1.4%)
6 (1.6%)
11 (2.4%)
4 (1.3%)
26 (1.7%)
SF_ph
Mean (SD)
88.4 (11.7)
87.6 (12.2)
88.9 (10.5)
88.9 (10.0)
88.5 (11.2)
Median [Q1, Q3]
91.3 [85.0, 96.3]
91.3 [84.3, 95.0]
91.8 [86.1, 95.0]
91.2 [86.3, 95.0]
91.3 [85.6, 95.0]
Missing
6 (1.7%)
6 (1.6%)
7 (1.5%)
3 (1.0%)
22 (1.5%)
SF_mh
Mean (SD)
81.4 (12.6)
81.1 (11.4)
81.3 (11.7)
80.5 (13.0)
81.1 (12.1)
Median [Q1, Q3]
85.0 [78.6, 88.8]
84.3 [77.9, 88.8]
84.5 [78.6, 88.5]
83.8 [78.3, 88.2]
84.3 [78.4, 88.5]
Missing
4 (1.1%)
3 (0.8%)
5 (1.1%)
2 (0.7%)
14 (0.9%)
Warmglow
No
124 (35.2)
127 (33.5)
140 (30.7)
103 (33.9)
494 (33.1)
Yes
209 (59.4)
223 (58.8)
290 (63.6)
188 (61.8)
910 (61.0)
Missing
19 (5.4%)
29 (7.7%)
26 (5.7%)
13 (4.3%)
87 (5.8%)
CIS_Totalmean
Mean (SD)
85.9 (6.97)
86.0 (6.57)
86.4 (6.47)
85.4 (5.87)
86.0 (6.50)
Median [Q1, Q3]
86.0 [83.0, 89.0]
86.0 [83.0, 89.0]
87.0 [83.0, 89.0]
86.0 [82.0, 89.0]
86.0 [83.0, 89.0]
Missing
4 (1.1%)
5 (1.3%)
8 (1.8%)
2 (0.7%)
19 (1.3%)
Code
malest1<- table1:: table1 (~ Age + Hb + Ferritin + factor (PICA) + factor (RLS) + CFQ_Total + SF_ph + SF_mh + Warmglow + CIS_Totalmean | factor (stap2), data = subset (data_t1, data_t1$ Gender== "Male" ), caption = "Males" , render.continuous= c (.= "Mean (SD)" , .= "Median [Q1, Q3]" ), render.categorical= c (.= "FREQ (PCT)" ))
malest1
Males
1(N=848)
2(N=925)
3(N=1073)
4(N=788)
Overall(N=3634)
Age
Mean (SD)
3.23 (14.8)
3.26 (15.2)
3.26 (15.2)
4.70 (15.5)
3.56 (15.2)
Median [Q1, Q3]
4.98 [-10.0, 16.0]
4.98 [-11.0, 17.0]
4.98 [-10.0, 16.0]
6.98 [-10.0, 18.0]
5.98 [-10.0, 17.0]
Hb
Mean (SD)
11.7 (48.0)
20.1 (102)
12.1 (52.3)
14.3 (70.4)
14.5 (71.4)
Median [Q1, Q3]
9.40 [8.90, 9.80]
9.30 [8.90, 9.80]
9.20 [8.80, 9.70]
9.20 [8.70, 9.70]
9.30 [8.90, 9.70]
Ferritin
Mean (SD)
58.5 (70.4)
49.7 (50.3)
44.0 (51.7)
44.7 (66.4)
49.0 (59.8)
Median [Q1, Q3]
39.3 [24.4, 68.7]
34.0 [18.9, 59.6]
27.8 [13.8, 51.9]
25.8 [12.6, 52.1]
32.0 [16.9, 57.7]
Missing
27 (3.2%)
44 (4.8%)
37 (3.4%)
60 (7.6%)
168 (4.6%)
factor(PICA)
No
811 (95.6)
881 (95.2)
1011 (94.2)
747 (94.8)
3450 (94.9)
Yes
22 (2.6)
24 (2.6)
27 (2.5)
22 (2.8)
95 (2.6)
Missing
15 (1.8%)
20 (2.2%)
35 (3.3%)
19 (2.4%)
89 (2.4%)
factor(RLS)
No
739 (87.1)
798 (86.3)
903 (84.2)
662 (84.0)
3102 (85.4)
Yes
44 (5.2)
48 (5.2)
61 (5.7)
61 (7.7)
214 (5.9)
Missing
65 (7.7%)
79 (8.5%)
109 (10.2%)
65 (8.2%)
318 (8.8%)
CFQ_Total
Mean (SD)
24.8 (11.9)
24.2 (11.4)
24.7 (11.4)
24.2 (11.4)
24.5 (11.5)
Median [Q1, Q3]
24.0 [16.0, 32.0]
24.0 [16.0, 31.0]
24.0 [17.0, 31.0]
24.0 [17.0, 31.0]
24.0 [17.0, 31.0]
Missing
19 (2.2%)
22 (2.4%)
32 (3.0%)
27 (3.4%)
100 (2.8%)
SF_ph
Mean (SD)
90.8 (8.95)
90.9 (9.13)
91.4 (7.69)
90.7 (9.34)
91.0 (8.73)
Median [Q1, Q3]
92.5 [88.7, 96.3]
93.7 [88.7, 96.3]
92.5 [88.8, 96.3]
92.5 [87.5, 96.3]
92.5 [88.7, 96.3]
Missing
12 (1.4%)
14 (1.5%)
15 (1.4%)
11 (1.4%)
52 (1.4%)
SF_mh
Mean (SD)
81.7 (12.1)
82.0 (12.1)
81.6 (12.7)
82.2 (11.6)
81.9 (12.2)
Median [Q1, Q3]
85.0 [79.3, 88.8]
85.3 [79.5, 89.3]
85.3 [79.5, 88.8]
85.3 [79.5, 89.3]
85.3 [79.5, 88.8]
Missing
8 (0.9%)
11 (1.2%)
11 (1.0%)
9 (1.1%)
39 (1.1%)
Warmglow
No
307 (36.2)
318 (34.4)
390 (36.3)
283 (35.9)
1298 (35.7)
Yes
478 (56.4)
552 (59.7)
609 (56.8)
449 (57.0)
2088 (57.5)
Missing
63 (7.4%)
55 (5.9%)
74 (6.9%)
56 (7.1%)
248 (6.8%)
CIS_Totalmean
Mean (SD)
85.2 (6.48)
85.4 (6.46)
85.3 (6.13)
85.8 (6.23)
85.4 (6.32)
Median [Q1, Q3]
86.0 [82.0, 88.0]
86.0 [82.0, 89.0]
86.0 [82.0, 89.0]
86.0 [83.0, 89.0]
86.0 [82.0, 89.0]
Missing
15 (1.8%)
17 (1.8%)
26 (2.4%)
18 (2.3%)
76 (2.1%)
Analysis
PICA
PICA is diagnosed when donor answers yes to: do you crave and regularly eat non-food items such as ice, clay, mud, sand, raw pasta, chalk or charcoal? (translated)
Models
Code
PICA_fitM <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
PICA_fit_preF <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
PICA_fit_postF <- glm (formula = PICA ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (PICA_fit_preF, PICA_fit_postF, PICA_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "PICA" )
PICA
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.01
0.00 – 0.02
<0.001
0.02
0.01 – 0.05
<0.001
0.03
0.02 – 0.04
<0.001
Age
0.93
0.88 – 0.97
0.001
0.99
0.93 – 1.05
0.694
1.00
0.98 – 1.01
0.645
Weight
1.03
1.00 – 1.05
0.016
1.03
0.99 – 1.06
0.128
1.02
1.01 – 1.04
0.007
Height
0.98
0.93 – 1.02
0.284
0.98
0.92 – 1.05
0.546
0.98
0.95 – 1.01
0.219
6-11 months
1.06
0.48 – 2.18
0.885
0.53
0.08 – 2.05
0.421
0.68
0.32 – 1.31
0.280
12-17 months
0.75
0.29 – 1.67
0.502
1.66
0.60 – 4.38
0.308
1.00
0.55 – 1.72
0.993
18-23 months
1.28
0.50 – 2.88
0.571
1.62
0.25 – 6.31
0.538
1.11
0.48 – 2.25
0.794
24+ months
0.80
0.29 – 1.86
0.629
1.93
0.52 – 5.87
0.275
0.91
0.47 – 1.65
0.766
Observations
2345
1465
3545
R2 Tjur
0.009
0.004
0.003
Model assumptions
Premenopausal females:
Code
plot (PICA_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3788 Yes -6.02 -11.2 -20.8 4 -4.38 2.96 2.97 0.00410
2 4453 Yes 1.98 -0.189 -13.8 4 -4.88 3.13 3.13 0.00275
3 7722 Yes -21.0 36.8 -1.76 3 -1.98 2.06 2.10 0.0395
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 4 × 13
.rowna…¹ PICA Age[,1] Weigh…² Heigh…³ Inter…⁴ .fitted .resid .std.…⁵ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3642 Yes -11.0 -23.2 -8.76 4 -4.61 3.04 3.04 0.00219
2 4028 Yes -0.0154 -10.2 -3.76 3 -4.76 3.09 3.09 0.00261
3 4453 Yes 1.98 -0.189 -13.8 4 -4.88 3.13 3.13 0.00275
4 7651 Yes 0.985 -20.2 -14.8 0 -5.07 3.19 3.19 0.00171
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Weight[,1], ³Height[,1],
# ⁴Intervention_months_factor, ⁵.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
3788 3.0187720 0.004100397 0.041247398
4453 3.1823811 0.002745063 0.045224283
5873 -0.6309422 0.073630863 0.002253238
7651 3.2299332 0.001706341 0.034185759
7722 2.1271122 0.039465735 0.038884139
Code
car:: influenceIndexPlot (PICA_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
PICA_fit_preF_upd1 <- update (PICA_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_preF,PICA_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.838 -4.978
SE 0.449 0.480
Age -0.0762 -0.0873
SE 0.0226 0.0243
Weight 0.0261 0.0210
SE 0.0109 0.0117
Height -0.0249 -0.0110
SE 0.0232 0.0238
Intervention_months_factor1 0.0552 0.0624
SE 0.3830 0.3830
Intervention_months_factor2 -0.293 -0.284
SE 0.435 0.436
Intervention_months_factor3 0.2481 0.0957
SE 0.4382 0.4646
Intervention_months_factor4 -0.224 -0.628
SE 0.463 0.546
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_preF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.5675 -0.2446 -0.1964 -0.1426 3.2650
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.97806 0.48015 -10.368 < 2e-16 ***
Age -0.08729 0.02435 -3.586 0.000336 ***
Weight 0.02095 0.01169 1.792 0.073148 .
Height -0.01095 0.02381 -0.460 0.645551
Intervention_months_factor1 0.06238 0.38298 0.163 0.870616
Intervention_months_factor2 -0.28433 0.43556 -0.653 0.513890
Intervention_months_factor3 0.09570 0.46465 0.206 0.836825
Intervention_months_factor4 -0.62845 0.54566 -1.152 0.249435
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 491.23 on 2341 degrees of freedom
Residual deviance: 472.08 on 2334 degrees of freedom
(685 observations deleted due to missingness)
AIC: 488.08
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.056045 1 1.027641
Weight 1.163751 1 1.078773
Height 1.111582 1 1.054316
Intervention_months_factor 1.006976 4 1.000869
Postmenopausal females:
Code
plot (PICA_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 1 -4.69 3.06 3.07 0.00540
2 3889 Yes 17.0 -19.2 -18.8 3 -3.92 2.81 2.83 0.0121
3 7354 Yes 25.0 -1.19 -3.76 1 -4.98 3.16 3.17 0.00450
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 3 × 13
.rownames PICA Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1968 Yes 22.0 6.81 -6.76 1 -4.69 3.06 3.07 0.00540
2 6560 Yes 27.0 -9.19 -7.76 0 -4.50 3.00 3.01 0.00312
3 7354 Yes 25.0 -1.19 -3.76 1 -4.98 3.16 3.17 0.00450
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1968 3.159646 0.005396830 0.0740710103
2897 -0.420213 0.060323738 0.0007615856
3889 2.916690 0.012066170 0.0782557928
4404 -0.259491 0.090758385 0.0004468314
7354 3.262409 0.004500247 0.0829501987
Code
car:: influenceIndexPlot (PICA_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 1968 , - 3889 , - 7354 ), ]
PICA_fit_postF_upd1 <- update (PICA_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fit_postF,PICA_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -4.099 -4.838
SE 0.539 0.449
Age -0.0121 -0.0762
SE 0.0308 0.0226
Weight 0.0250 0.0261
SE 0.0164 0.0109
Height -0.0201 -0.0249
SE 0.0333 0.0232
Intervention_months_factor1 -0.6280 0.0552
SE 0.7799 0.3830
Intervention_months_factor2 0.509 -0.293
SE 0.499 0.435
Intervention_months_factor3 0.483 0.248
SE 0.785 0.438
Intervention_months_factor4 0.655 -0.224
SE 0.600 0.463
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4133 -0.2063 -0.1734 -0.1483 3.1595
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.09907 0.53920 -7.602 2.91e-14 ***
Age -0.01214 0.03081 -0.394 0.694
Weight 0.02500 0.01644 1.521 0.128
Height -0.02009 0.03328 -0.604 0.546
Intervention_months_factor1 -0.62798 0.77987 -0.805 0.421
Intervention_months_factor2 0.50858 0.49885 1.020 0.308
Intervention_months_factor3 0.48344 0.78492 0.616 0.538
Intervention_months_factor4 0.65540 0.60015 1.092 0.275
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 253.11 on 1464 degrees of freedom
Residual deviance: 247.37 on 1457 degrees of freedom
(608 observations deleted due to missingness)
AIC: 263.37
Number of Fisher Scoring iterations: 7
Code
summary (PICA_fit_postF_upd1)
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.040166 1 1.019885
Weight 1.158062 1 1.076133
Height 1.185532 1 1.088821
Intervention_months_factor 1.011547 4 1.001436
Males:
Code
plot (PICA_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (PICA_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames PICA Age[,1] Weight…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 Yes -9.02 66.8 23.2 2 -2.54 2.29 2.31 0.0172
2 6725 Yes -11.0 51.8 -2.76 4 -2.46 2.25 2.28 0.0201
3 8024 Yes 24.0 8.81 -24.8 0 -3.00 2.47 2.48 0.0115
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = PICA), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 4 influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, PICA <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (PICA_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (PICA_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
1294 -0.3973451 0.068791951 0.0007832811
5248 -0.4809799 0.032709351 0.0005258886
6621 2.9351986 0.002152348 0.0183783288
6725 2.3060317 0.020059249 0.0304778698
7465 2.9609268 0.002124400 0.0195002389
8024 2.5155578 0.011478062 0.0294584111
Code
car:: influenceIndexPlot (PICA_fitM, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 6621 , - 1294 , - 7465 ), ]
PICA_fitM_upd1 <- update (PICA_fitM, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (PICA_fitM,PICA_fitM_upd1) # no improvement
Calls:
1: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender == "Male")
2: glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -3.590 -4.838
SE 0.183 0.449
Age -0.00332 -0.07616
SE 0.00721 0.02259
Weight 0.02196 0.02613
SE 0.00816 0.01088
Height -0.0193 -0.0249
SE 0.0157 0.0232
Intervention_months_factor1 -0.3809 0.0552
SE 0.3526 0.3830
Intervention_months_factor2 -0.00243 -0.29262
SE 0.28825 0.43547
Intervention_months_factor3 0.102 0.248
SE 0.389 0.438
Intervention_months_factor4 -0.0948 -0.2238
SE 0.3190 0.4630
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Male")
Deviance Residuals:
Min 1Q Median 3Q Max
-0.4767 -0.2463 -0.2259 -0.2076 2.9345
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.589998 0.182792 -19.640 < 2e-16 ***
Age -0.003321 0.007205 -0.461 0.64490
Weight 0.021961 0.008161 2.691 0.00712 **
Height -0.019276 0.015696 -1.228 0.21943
Intervention_months_factor1 -0.380917 0.352632 -1.080 0.28005
Intervention_months_factor2 -0.002432 0.288246 -0.008 0.99327
Intervention_months_factor3 0.101603 0.388965 0.261 0.79393
Intervention_months_factor4 -0.094791 0.319002 -0.297 0.76635
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 875.12 on 3544 degrees of freedom
Residual deviance: 866.95 on 3537 degrees of freedom
(671 observations deleted due to missingness)
AIC: 882.95
Number of Fisher Scoring iterations: 6
Code
Call:
glm(formula = PICA ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6176 -0.2450 -0.2033 -0.1570 3.1874
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.83824 0.44894 -10.777 < 2e-16 ***
Age -0.07616 0.02259 -3.372 0.000746 ***
Weight 0.02613 0.01088 2.402 0.016300 *
Height -0.02488 0.02323 -1.071 0.284121
Intervention_months_factor1 0.05523 0.38299 0.144 0.885335
Intervention_months_factor2 -0.29262 0.43547 -0.672 0.501609
Intervention_months_factor3 0.24805 0.43821 0.566 0.571356
Intervention_months_factor4 -0.22381 0.46300 -0.483 0.628810
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 514.02 on 2344 degrees of freedom
Residual deviance: 496.47 on 2337 degrees of freedom
(685 observations deleted due to missingness)
AIC: 512.47
Number of Fisher Scoring iterations: 7
Code
#multicollinearity
car:: vif (PICA_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.064441 1 1.031718
Weight 1.215704 1 1.102590
Height 1.201809 1 1.096271
Intervention_months_factor 1.011403 4 1.001418
Restless legs syndrome
Restless legs syndrome (RLS) diagnose was based of Burchell & Alleen, 2008. Validation of the self-completed Cambridge-Hopkins questionnaire (CH-RLSq) for ascertainment of restless legs syndrome (RLS) in a population survey.
Predict by ferritin
Code
# Compute the analysis of variance
data_scale$ RLS <- as.numeric (data_scale$ RLS)
means <- round (aggregate (Ferritin ~ RLS, data_scale, mean), 1 )
res.aov <- aov (RLS ~ Fergroup, data = data_scale)
summary (res.aov)
Df Sum Sq Mean Sq F value Pr(>F)
Fergroup 2 0.1 0.05452 0.756 0.47
Residuals 6404 461.7 0.07210
1748 observations deleted due to missingness
Code
#fit model
fit1 <- lm (RLS ~ 1 + Fergroup, data = data_scale)
summary (fit1)
Call:
lm(formula = RLS ~ 1 + Fergroup, data = data_scale)
Residuals:
Min 1Q Median 3Q Max
-0.08338 -0.08338 -0.07836 -0.07216 0.92784
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.072165 0.006612 162.148 <2e-16 ***
FergroupFerritin 15-30 0.011214 0.009125 1.229 0.219
FergroupFerritin > 30 0.006200 0.008264 0.750 0.453
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2685 on 6404 degrees of freedom
(1748 observations deleted due to missingness)
Multiple R-squared: 0.0002361, Adjusted R-squared: -7.613e-05
F-statistic: 0.7562 on 2 and 6404 DF, p-value: 0.4695
Code
data_scale <- data_scale %>% mutate (RLS = factor (case_when (RLS == 2 ~ 1 , RLS == 1 ~ 0 )) %>% fct_recode ("Yes" = "1" , "No" = "0" ))
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (RLS) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (RLS)), aes (Fergroup, Percentage, fill = RLS)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = RLS)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- res <- t.test (Ferritin ~ RLS, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by RLS
t = -0.68841, df = 6405, p-value = 0.4912
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-6.026412 2.893888
sample estimates:
mean in group No mean in group Yes
40.56889 42.13515
Code
ggplot (subset (data_scale, ! is.na (RLS)), aes (x= RLS, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "RLS" , y= "Ferritin" )
Models
Code
RLS_fitM <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
RLS_fit_preF <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
RLS_fit_postF <- glm (formula = RLS ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (RLS_fit_preF, RLS_fit_postF, RLS_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Restless Legs Syndrome" )
Restless Legs Syndrome
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
0.09
0.06 – 0.13
<0.001
0.08
0.05 – 0.13
<0.001
0.06
0.04 – 0.08
<0.001
Age
1.03
1.01 – 1.05
0.014
1.02
0.99 – 1.04
0.209
1.01
1.00 – 1.02
0.131
Weight
1.00
0.98 – 1.01
0.891
1.01
0.99 – 1.02
0.494
1.01
1.00 – 1.02
0.209
Height
1.00
0.98 – 1.03
0.881
0.99
0.96 – 1.02
0.444
1.00
0.97 – 1.02
0.701
6-11 months
1.58
0.99 – 2.49
0.052
2.20
1.42 – 3.41
<0.001
1.09
0.70 – 1.64
0.700
12-17 months
0.98
0.57 – 1.65
0.955
0.54
0.28 – 0.97
0.051
0.73
0.45 – 1.14
0.182
18-23 months
2.23
1.34 – 3.63
0.002
1.14
0.53 – 2.23
0.726
1.80
1.12 – 2.80
0.012
24+ months
2.27
1.46 – 3.50
<0.001
2.27
1.37 – 3.71
0.001
1.49
1.01 – 2.16
0.042
Observations
2140
1303
3316
R2 Tjur
0.013
0.026
0.005
Model assumptions
Premenopausal females:
Code
plot (RLS_fit_preF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) #residuals not above 3, no further attention needed
# A tibble: 3 × 13
.rownames RLS Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 4031 Yes 0.985 36.8 1.24 3 -1.66 1.91 1.93 0.0179
2 6332 Yes -9.02 11.8 -16.8 2 -2.74 2.37 2.38 0.00604
3 7722 Yes -21.0 36.8 -1.76 3 -2.23 2.16 2.18 0.0138
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_preF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
821 2.5083683 0.001718628 0.0047014738
3945 -0.5792803 0.022894442 0.0005401043
4031 1.9384013 0.017930194 0.0121754551
6262 2.5072010 0.002864416 0.0077316809
7722 2.1909050 0.013832408 0.0165800093
Code
car:: influenceIndexPlot (RLS_fit_preF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 4453 , - 3788 , - 7722 ), ]
RLS_fit_preF_upd1 <- update (RLS_fit_preF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_preF,RLS_fit_preF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -2.448 -2.439
SE 0.206 0.206
Age 0.0259 0.0278
SE 0.0105 0.0106
Weight -0.000999 -0.003117
SE 0.007310 0.007445
Height 0.00204 0.00228
SE 0.01359 0.01368
Intervention_months_factor1 0.457 0.457
SE 0.235 0.235
Intervention_months_factor2 -0.0151 -0.0166
SE 0.2694 0.2694
Intervention_months_factor3 0.801 0.762
SE 0.253 0.256
Intervention_months_factor4 0.818 0.827
SE 0.223 0.223
Code
Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6133 -0.4526 -0.3739 -0.3201 2.5009
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.448048 0.206469 -11.857 < 2e-16 ***
Age 0.025930 0.010544 2.459 0.013923 *
Weight -0.000999 0.007310 -0.137 0.891293
Height 0.002035 0.013588 0.150 0.880921
Intervention_months_factor1 0.456934 0.234656 1.947 0.051505 .
Intervention_months_factor2 -0.015128 0.269353 -0.056 0.955210
Intervention_months_factor3 0.800577 0.252922 3.165 0.001549 **
Intervention_months_factor4 0.817593 0.222996 3.666 0.000246 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1192.2 on 2139 degrees of freedom
Residual deviance: 1165.0 on 2132 degrees of freedom
(890 observations deleted due to missingness)
AIC: 1181
Number of Fisher Scoring iterations: 5
Code
summary (RLS_fit_preF_upd1)
Call:
glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6260 -0.4486 -0.3741 -0.3189 2.5137
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -2.438993 0.206326 -11.821 < 2e-16 ***
Age 0.027829 0.010576 2.631 0.008502 **
Weight -0.003117 0.007445 -0.419 0.675439
Height 0.002279 0.013679 0.167 0.867654
Intervention_months_factor1 0.456948 0.234694 1.947 0.051536 .
Intervention_months_factor2 -0.016589 0.269384 -0.062 0.950896
Intervention_months_factor3 0.761968 0.255774 2.979 0.002891 **
Intervention_months_factor4 0.826594 0.223089 3.705 0.000211 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1186.8 on 2136 degrees of freedom
Residual deviance: 1159.5 on 2129 degrees of freedom
(890 observations deleted due to missingness)
AIC: 1175.5
Number of Fisher Scoring iterations: 5
Code
#multicollinearity
car:: vif (RLS_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.068334 1 1.033602
Weight 1.222045 1 1.105461
Height 1.153224 1 1.073883
Intervention_months_factor 1.004374 4 1.000546
Postmenopausal females:
Code
plot (RLS_fit_postF, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames RLS Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1289 Yes 6.98 31.8 3.24 2 -2.91 2.44 2.45 0.00813
2 3919 Yes 5.98 9.81 9.24 3 -2.37 2.22 2.23 0.0141
3 4404 Yes 9.98 -16.2 -59.8 0 -1.80 1.97 2.05 0.0716
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 )
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fit_postF)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fit_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 -0.686794 0.044823103 0.001586396
1268 2.483515 0.004722464 0.011822875
1290 2.492310 0.004082840 0.010503083
3919 2.252302 0.014149332 0.019472978
4404 2.089048 0.071559171 0.062543565
Code
car:: influenceIndexPlot (RLS_fit_postF, id.n= 3 )
Code
# check without some outliers
data_scale2 <- data_scale[c (- 3919 , - 4404 ), ]
RLS_fit_postF_upd1 <- update (RLS_fit_postF, data = data_scale2, subset= data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
car:: compareCoefs (RLS_fit_postF,RLS_fit_postF_upd1) # no improvement
Calls:
1: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale, subset = data_scale$Gender ==
"Female" & data_scale$PostMeno == 1)
2: glm(formula = RLS ~ Age + Weight + Height + Intervention_months_factor,
family = binomial, data = data_scale2, subset = data_scale2$Gender ==
"Female" & data_scale2$PostMeno == 0)
Model 1 Model 2
(Intercept) -2.546 -2.448
SE 0.251 0.206
Age 0.0165 0.0259
SE 0.0131 0.0105
Weight 0.005569 -0.000999
SE 0.008147 0.007310
Height -0.01131 0.00204
SE 0.01476 0.01359
Intervention_months_factor1 0.790 0.457
SE 0.223 0.235
Intervention_months_factor2 -0.6212 -0.0151
SE 0.3183 0.2694
Intervention_months_factor3 0.127 0.801
SE 0.364 0.253
Intervention_months_factor4 0.822 0.818
SE 0.253 0.223
Code
#multicollinearity
car:: vif (RLS_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.028736 1 1.014266
Weight 1.140586 1 1.067982
Height 1.149297 1 1.072053
Intervention_months_factor 1.021225 4 1.002629
Males:
Code
plot (RLS_fitM, which = 4 , id.n = 3 )
Code
#Inspect the largest:
model.data <- augment (RLS_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd)
# A tibble: 3 × 13
.rownames RLS Age[,1] Weigh…¹ Heigh…² Inter…³ .fitted .resid .std.…⁴ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 791 Yes 6.98 56.8 -1.76 0 -2.34 2.20 2.22 0.0107
2 2907 Yes -15.0 46.8 3.24 4 -2.20 2.15 2.16 0.00974
3 4049 Yes -22.0 56.8 5.24 1 -2.50 2.27 2.28 0.0115
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³Intervention_months_factor, ⁴.std.resid
Code
#Plot standardized residuals:
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = RLS), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # 0 influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, RLS <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
cooksd = cooks.distance (RLS_fitM)
plot (cooksd, main= "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# influence plot
car:: influencePlot (RLS_fitM,col= "red" ,id.n= 3 )
StudRes Hat CookD
791 2.2287712 0.010709142 0.0141342181
1294 -0.3135131 0.023596520 0.0001538936
1302 2.6051234 0.002345544 0.0081914267
3057 -0.5579593 0.022685414 0.0004933534
4049 2.3021371 0.011519861 0.0179545641
7085 2.6056903 0.003212045 0.0111192775
Code
car:: influenceIndexPlot (RLS_fitM, id.n= 3 )
Code
#multicollinearity
car:: vif (RLS_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.078175 1 1.038352
Weight 1.240648 1 1.113844
Height 1.242826 1 1.114821
Intervention_months_factor 1.009696 4 1.001207
Cognitive functioning
Scoring: https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/CFQ-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ CFQ_Total, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CFQ_Total, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Cognitive functioning, higher is worse" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CFQ_Total)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Cognitive functioning, higher is worse" )
Models
Code
CFQ_fitM <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
CFQ_fit_preF <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_fit_postF <- lm (formula = CFQ_Total ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
pl <- c (
` (Intercept) ` = "Intercept" ,
Intervention_months_factor0 = "0-5 months" ,
Intervention_months_factor1 = "6-11 months" ,
Intervention_months_factor2 = "12-17 months" ,
Intervention_months_factor3 = "18-23 months" ,
Intervention_months_factor4 = "24+ months"
)
tab_model (CFQ_fit_preF, CFQ_fit_postF, CFQ_fitM, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning" )
Cognitive functioning
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
27.27
25.92 – 28.62
<0.001
28.03
26.52 – 29.54
<0.001
25.51
24.85 – 26.17
<0.001
Age
-0.25
-0.32 – -0.18
<0.001
-0.16
-0.25 – -0.08
<0.001
-0.14
-0.17 – -0.11
<0.001
Weight
0.01
-0.04 – 0.06
0.754
0.02
-0.04 – 0.07
0.570
-0.01
-0.04 – 0.02
0.569
Height
-0.09
-0.17 – 0.00
0.062
0.01
-0.09 – 0.11
0.801
-0.08
-0.14 – -0.02
0.007
6-11 months
0.05
-1.48 – 1.58
0.951
0.70
-0.94 – 2.34
0.402
0.25
-0.87 – 1.37
0.657
12-17 months
-0.97
-2.49 – 0.54
0.209
1.05
-0.49 – 2.59
0.181
0.77
-0.30 – 1.83
0.159
18-23 months
-0.68
-2.58 – 1.23
0.484
2.11
-0.30 – 4.53
0.087
-0.66
-2.15 – 0.82
0.381
24+ months
1.57
-0.07 – 3.21
0.060
1.04
-0.96 – 3.03
0.307
0.15
-0.99 – 1.29
0.795
Observations
2342
1464
3534
R2 / R2 adjusted
0.025 / 0.022
0.013 / 0.009
0.036 / 0.034
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance meh (heteroscedasticity)
plot (CFQ_fit_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CFQ_fit_preF, which = 3 ) # Linearity good, don't have constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fit_preF
BP = 36.591, df = 7, p-value = 5.607e-06
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_preF)
W = 0.98238, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_preF, which = 4 , id.n = 10 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 60 -6.02 49.8 5.24 2 27.7 32.3 0.0122 13.1
2 4065 73 -17.0 35.8 -10.8 1 32.7 40.3 0.0111 13.1
3 7774 82 -22.0 -20.2 -6.76 3 32.5 49.5 0.00522 13.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 19 × 13
.rown…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 671 78 -18.0 -17.2 -12.8 0 32.7 45.3 0.00170 13.1
2 1146 76 -17.0 -23.2 -4.76 1 31.8 44.2 0.00338 13.1
3 1366 87 -15.0 -11.2 -9.76 0 31.7 55.3 0.00120 13.1
4 1414 77 -23.0 -8.19 0.240 2 31.9 45.1 0.00363 13.1
5 1776 70 -16.0 8.81 3.24 2 30.1 39.9 0.00389 13.1
6 3168 72 -14.0 19.8 -0.760 2 30.0 42.0 0.00487 13.1
7 3514 86 -17.0 -15.2 -11.8 4 33.9 52.1 0.00359 13.1
8 3611 92 -18.0 -13.2 -8.76 4 33.9 58.1 0.00334 13.1
9 3662 83 -17.0 -23.2 -15.8 4 34.2 48.8 0.00450 13.1
10 3812 82 -17.0 14.8 -8.76 0 32.3 49.7 0.00354 13.1
11 4065 73 -17.0 35.8 -10.8 1 32.7 40.3 0.0111 13.1
12 4595 78 -16.0 6.81 1.24 0 31.2 46.8 0.00202 13.1
13 5244 92 -21.0 -20.2 -8.76 2 32.1 59.9 0.00325 13.1
14 5572 72 -1.02 -3.19 -1.76 4 29.2 42.8 0.00421 13.1
15 6447 75 -14.0 -8.19 6.24 0 30.1 44.9 0.00278 13.1
16 6780 74 -18.0 6.81 -10.8 4 34.3 39.7 0.00482 13.1
17 6859 72 -16.0 -18.2 -8.76 3 31.2 40.8 0.00481 13.1
18 7737 73 -19.0 -6.19 -2.76 3 31.5 41.5 0.00475 13.1
19 7774 82 -22.0 -20.2 -6.76 3 32.5 49.5 0.00522 13.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# (log) transform to pass normality.
data_scale2 <- data_scale
data_scale2$ CFQ_Total <- log (data_scale2$ CFQ_Total)
table (data_scale2$ CFQ_Total)
-Inf 0 0.693147180559945 1.09861228866811
29 20 29 34
1.38629436111989 1.6094379124341 1.79175946922805 1.94591014905531
31 66 53 73
2.07944154167984 2.19722457733622 2.30258509299405 2.39789527279837
86 91 108 121
2.484906649788 2.56494935746154 2.63905732961526 2.70805020110221
147 143 150 180
2.77258872223978 2.83321334405622 2.89037175789616 2.94443897916644
193 199 229 246
2.99573227355399 3.04452243772342 3.09104245335832 3.13549421592915
230 266 225 290
3.17805383034795 3.2188758248682 3.25809653802148 3.29583686600433
262 293 273 269
3.3322045101752 3.36729582998647 3.40119738166216 3.43398720448515
265 279 252 236
3.46573590279973 3.49650756146648 3.52636052461616 3.55534806148941
190 218 205 175
3.58351893845611 3.61091791264422 3.63758615972639 3.66356164612965
176 162 130 111
3.68887945411394 3.71357206670431 3.73766961828337 3.76120011569356
130 113 84 105
3.78418963391826 3.80666248977032 3.8286413964891 3.85014760171006
60 76 62 59
3.87120101090789 3.89182029811063 3.91202300542815 3.93182563272433
50 44 45 35
3.95124371858143 3.97029191355212 3.98898404656427 4.00733318523247
25 36 21 18
4.02535169073515 4.04305126783455 4.06044301054642 4.07753744390572
19 17 19 18
4.0943445622221 4.11087386417331 4.12713438504509 4.14313472639153
15 17 8 9
4.15888308335967 4.17438726989564 4.18965474202643 4.20469261939097
5 3 9 7
4.21950770517611 4.23410650459726 4.24849524204936 4.26267987704132
5 2 3 4
4.27666611901606 4.29045944114839 4.30406509320417 4.31748811353631
4 2 1 1
4.33073334028633 4.34380542185368 4.35670882668959 4.36944785246702
1 2 2 1
4.39444915467244 4.40671924726425 4.4188406077966 4.45434729625351
1 2 1 1
4.46590811865458 4.52178857704904
1 2
Code
data_scale2[data_scale2 <= - Inf ] <- NA # Replace -Inf with NA
table (data_scale2$ CFQ_Total)
0 0.693147180559945 1.09861228866811 1.38629436111989
20 29 34 31
1.6094379124341 1.79175946922805 1.94591014905531 2.07944154167984
66 53 73 86
2.19722457733622 2.30258509299405 2.39789527279837 2.484906649788
91 108 121 147
2.56494935746154 2.63905732961526 2.70805020110221 2.77258872223978
143 150 180 193
2.83321334405622 2.89037175789616 2.94443897916644 2.99573227355399
199 229 246 230
3.04452243772342 3.09104245335832 3.13549421592915 3.17805383034795
266 225 290 262
3.2188758248682 3.25809653802148 3.29583686600433 3.3322045101752
293 273 269 265
3.36729582998647 3.40119738166216 3.43398720448515 3.46573590279973
279 252 236 190
3.49650756146648 3.52636052461616 3.55534806148941 3.58351893845611
218 205 175 176
3.61091791264422 3.63758615972639 3.66356164612965 3.68887945411394
162 130 111 130
3.71357206670431 3.73766961828337 3.76120011569356 3.78418963391826
113 84 105 60
3.80666248977032 3.8286413964891 3.85014760171006 3.87120101090789
76 62 59 50
3.89182029811063 3.91202300542815 3.93182563272433 3.95124371858143
44 45 35 25
3.97029191355212 3.98898404656427 4.00733318523247 4.02535169073515
36 21 18 19
4.04305126783455 4.06044301054642 4.07753744390572 4.0943445622221
17 19 18 15
4.11087386417331 4.12713438504509 4.14313472639153 4.15888308335967
17 8 9 5
4.17438726989564 4.18965474202643 4.20469261939097 4.21950770517611
3 9 7 5
4.23410650459726 4.24849524204936 4.26267987704132 4.27666611901606
2 3 4 4
4.29045944114839 4.30406509320417 4.31748811353631 4.33073334028633
2 1 1 1
4.34380542185368 4.35670882668959 4.36944785246702 4.39444915467244
2 2 1 1
4.40671924726425 4.4188406077966 4.45434729625351 4.46590811865458
2 1 1 1
4.52178857704904
2
Code
CFQFfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CFQFfactor_fix), col = "grey" )
qqline (resid (CFQFfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQFfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQFfactor_fix)
W = 0.9088, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see below.
#multicollinearity
car:: vif (CFQ_fit_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.065997 1 1.032471
Weight 1.201466 1 1.096114
Height 1.134895 1 1.065314
Intervention_months_factor 1.003608 4 1.000450
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance looks good (heteroscedasticity)
plot (CFQ_fit_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fit_postF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fit_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CFQ_fit_postF
BP = 12.905, df = 7, p-value = 0.07445
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fit_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fit_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fit_postF)
W = 0.99081, p-value = 6.045e-08
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fit_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fit_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3699 2 3.98 10.8 -3.76 3 29.6 -27.6 0.0139 11.2
2 3940 0 6.98 -19.2 -22.8 3 28.4 -28.4 0.0160 11.2
3 4433 79 3.98 -8.19 -12.8 4 28.1 50.9 0.00927 11.1
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 4 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 695 72 15.0 11.8 -6.76 0 25.7 46.3 0.00331 11.1
2 1869 61 11.0 7.81 -7.76 2 27.3 33.7 0.00459 11.2
3 4433 79 3.98 -8.19 -12.8 4 28.1 50.9 0.00927 11.1
4 4695 60 16.0 0.811 -4.76 2 26.5 33.5 0.00384 11.2
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fit_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQ_postF_factor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CFQ_postF_factor_fix), col = "grey" )
qqline (resid (CFQ_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQ_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CFQ_postF_factor_fix)
W = 0.93056, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fit_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035602 1 1.017645
Weight 1.153175 1 1.073860
Height 1.177418 1 1.085089
Intervention_months_factor 1.012313 4 1.001531
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CFQ_fitM, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CFQ_fitM, which = 3 ) # weird pattern below
Code
# formal test in the form of Breusch-Pagan
bptest (CFQ_fitM)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CFQ_fitM
BP = 63.672, df = 7, p-value = 2.779e-11
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CFQ_fitM, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CFQ_fitM)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CFQ_fitM)
W = 0.98849, p-value = 2.327e-16
Code
### OUTLIERS
#plot 3largest
plot (CFQ_fitM, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CFQ_fitM) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 66 -9.02 66.8 23.2 2 25.0 41.0 0.00869 11.3
2 2736 65 -18.0 56.8 10.2 0 26.7 38.3 0.00731 11.3
3 5561 66 29.0 -7.19 -5.76 4 22.1 43.9 0.00388 11.3
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CFQ_Total, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CFQ_Total), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 22 × 13
.rown…¹ CFQ_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 309 59 5.98 11.8 1.24 0 24.5 34.5 9.22e-4 11.3
2 897 66 -9.02 66.8 23.2 2 25.0 41.0 8.69e-3 11.3
3 1214 61 15.0 13.8 5.24 1 23.1 37.9 2.20e-3 11.3
4 1266 59 6.98 1.81 12.2 2 24.3 34.7 2.17e-3 11.3
5 1730 81 4.98 -14.2 -3.76 0 25.3 55.7 1.61e-3 11.3
6 2505 61 11.0 -3.19 -2.76 3 23.6 37.4 4.47e-3 11.3
7 2736 65 -18.0 56.8 10.2 0 26.7 38.3 7.31e-3 11.3
8 2756 67 -24.0 -8.19 -11.8 0 29.9 37.1 3.94e-3 11.3
9 2879 68 -18.0 1.81 8.24 4 27.5 40.5 2.62e-3 11.3
10 3045 61 -18.0 11.8 18.2 4 26.6 34.4 3.14e-3 11.3
# … with 12 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹.rownames, ²CFQ_Total,
# ³Weight[,1], ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CFQ_fitM)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
CFQMfactor_fix <- lm (CFQ_Total~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Male" )
qqnorm (resid (CFQMfactor_fix), col = "grey" )
qqline (resid (CFQMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CFQMfactor_fix))
Shapiro-Wilk normality test
data: resid(CFQMfactor_fix)
W = 0.89562, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CFQ_fitM) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.091034 1 1.044526
Weight 1.270075 1 1.126976
Height 1.250205 1 1.118126
Intervention_months_factor 1.007324 4 1.000913
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
CFQ_preF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CFQ_postF_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CFQ_M_robust <- lmrob (CFQ_Total ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CFQ_preF_robust, CFQ_postF_robust, CFQ_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Cognitive functioning, higher means more cognitive failure" , digits = 3 )
Cognitive functioning, higher means more cognitive failure
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
27.197
25.990 – 28.404
<0.001
27.331
25.745 – 28.918
<0.001
25.049
24.365 – 25.732
<0.001
Age
-0.210
-0.280 – -0.141
<0.001
-0.148
-0.240 – -0.056
0.002
-0.126
-0.153 – -0.098
<0.001
Weight
-0.005
-0.053 – 0.042
0.826
0.000
-0.053 – 0.053
0.997
-0.021
-0.059 – 0.016
0.269
Height
-0.073
-0.161 – 0.015
0.102
0.014
-0.087 – 0.114
0.791
-0.069
-0.129 – -0.008
0.026
6-12 months
0.144
-1.354 – 1.642
0.851
0.873
-0.767 – 2.512
0.297
0.047
-1.037 – 1.130
0.933
12-18 months
-1.458
-2.942 – 0.025
0.054
1.084
-0.502 – 2.670
0.180
0.807
-0.285 – 1.898
0.148
18-24 months
-0.924
-2.617 – 0.769
0.285
2.588
-0.055 – 5.231
0.055
-0.591
-2.138 – 0.955
0.454
24+ months
0.866
-0.823 – 2.555
0.315
0.859
-1.123 – 2.842
0.395
-0.238
-1.375 – 0.900
0.682
Observations
2342
1464
3534
R2 / R2 adjusted
0.022 / 0.019
0.012 / 0.007
0.033 / 0.031
Physical health
Scored by https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_ph, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Physical health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_ph)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Physical health (SF-36), higher is better" )
Models
Code
SF_ph_preF <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_M <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF, SF_ph_postF, SF_ph_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Physical health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Physical health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
90.543
89.548 – 91.539
<0.001
88.301
86.818 – 89.785
<0.001
91.230
90.733 – 91.728
<0.001
Age
0.079
0.026 – 0.132
0.004
-0.025
-0.110 – 0.060
0.562
-0.031
-0.050 – -0.011
0.002
Weight
-0.154
-0.190 – -0.118
<0.001
-0.190
-0.242 – -0.137
<0.001
-0.113
-0.138 – -0.087
<0.001
Height
0.146
0.080 – 0.212
<0.001
0.048
-0.047 – 0.144
0.321
0.117
0.073 – 0.162
<0.001
6-12 months
-0.203
-1.329 – 0.922
0.723
0.435
-1.167 – 2.038
0.594
0.099
-0.744 – 0.942
0.818
12-18 months
0.329
-0.783 – 1.441
0.562
-1.258
-2.771 – 0.255
0.103
-0.538
-1.344 – 0.269
0.191
18-24 months
-0.479
-1.878 – 0.920
0.502
-1.913
-4.272 – 0.446
0.112
-0.088
-1.210 – 1.033
0.877
24+ months
-0.496
-1.703 – 0.712
0.421
0.985
-0.977 – 2.947
0.325
-0.015
-0.879 – 0.848
0.972
Observations
2398
1468
3582
R2 / R2 adjusted
0.031 / 0.028
0.039 / 0.035
0.031 / 0.029
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_ph_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_ph_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_preF) # small p-value, we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_preF
BP = 25.899, df = 7, p-value = 0.0005251
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_preF)
W = 0.79775, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3703 31.9 -2.02 24.8 8.24 3 87.3 -55.4 0.00862 9.71
2 4034 21.9 -22.0 -7.19 -14.8 3 87.3 -65.4 0.00592 9.68
3 4065 13.8 -17.0 35.8 -10.8 1 81.9 -68.1 0.0109 9.67
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 20 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 936 43.1 -8.02 9.81 -16.8 2 86.3 -43.2 0.00561 9.73
2 1407 23.8 -15.0 -17.2 -8.76 2 91.1 -67.3 0.00266 9.68
3 1659 28.1 -12.0 26.8 1.24 0 85.6 -57.5 0.00463 9.70
4 2032 34.4 -14.0 -11.2 -7.76 0 90.0 -55.6 0.00106 9.71
5 2111 41.2 -21.0 -5.19 3.24 0 90.2 -48.9 0.00226 9.72
6 2271 48.7 -21.0 -13.2 -7.76 0 89.8 -41.1 0.00141 9.74
7 2998 50.6 -22.0 -26.2 -11.8 4 90.6 -40.1 0.00443 9.74
8 3703 31.9 -2.02 24.8 8.24 3 87.3 -55.4 0.00862 9.71
9 3877 50.0 -19.0 -0.189 9.24 0 90.4 -40.4 0.00359 9.74
10 4034 21.9 -22.0 -7.19 -14.8 3 87.3 -65.4 0.00592 9.68
11 4065 13.8 -17.0 35.8 -10.8 1 81.9 -68.1 0.0109 9.67
12 4595 42.4 -16.0 6.81 1.24 0 88.4 -46.0 0.00199 9.73
13 5649 38.7 -1.02 -4.19 -18.8 0 88.4 -49.7 0.00427 9.72
14 6172 32.5 -21.0 4.81 -16.8 1 85.5 -53.0 0.00580 9.71
15 6286 52.5 -5.02 -10.2 -0.760 2 91.9 -39.5 0.00331 9.74
16 6414 45.0 -12.0 -20.2 -9.76 0 91.3 -46.3 0.00158 9.73
17 6859 47.5 -16.0 -18.2 -8.76 3 90.3 -42.9 0.00466 9.73
18 7026 42.6 -6.02 -13.2 -8.76 0 90.8 -48.2 0.00169 9.72
19 7246 44.4 -22.0 -13.2 -15.8 1 88.4 -43.9 0.00401 9.73
20 7301 43.2 -19.0 -18.2 -2.76 1 91.3 -48.1 0.00315 9.72
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_ph)
13.8010204081633 15.0510204081633 15.1020408163265 16.25
1 1 1 1
21.9132653061224 23.1122448979592 23.8010204081633 23.8520408163265
1 1 1 1
25.6122448979592 26.8622448979592 27.4744897959184 28.1122448979592
1 1 1 1
29.3622448979592 30.6122448979592 31.1734693877551 31.9132653061224
1 1 1 2
32.4744897959184 32.6020408163265 33.1632653061225 33.8010204081633
1 1 1 1
34.4132653061224 35.6122448979592 35.6632653061224 37.4234693877551
1 1 1 1
38.7244897959184 39.3622448979592 39.9744897959184 40.0255102040816
3 2 1 1
40.0510204081633 40.6122448979592 40.6632653061224 41.1734693877551
1 2 1 1
41.2244897959184 42.4234693877551 42.4744897959184 42.6020408163265
4 1 2 1
43.0867346938775 43.1632653061224 43.7244897959184 44.3367346938775
1 1 4 1
44.4132653061224 44.9744897959184 45.1020408163265 45.6122448979592
2 3 1 1
46.2244897959184 46.2755102040816 46.8367346938775 47.4744897959184
1 1 1 3
48.0357142857143 48.0867346938775 48.6734693877551 48.7244897959184
2 2 1 4
48.7755102040816 49.4132653061224 49.9744897959184 50.5357142857143
2 2 2 2
50.5867346938775 50.6122448979592 51.1734693877551 51.2244897959184
3 1 1 2
51.2755102040816 51.7857142857143 51.8367346938775 52.4744897959184
2 1 3 6
52.5255102040816 52.984693877551 53.0357142857143 53.0867346938775
1 1 3 5
53.1632653061224 53.7244897959184 53.7755102040816 54.3367346938775
1 3 1 4
54.3622448979592 54.3877551020408 54.4642857142857 54.9744897959184
2 1 1 1
55.0255102040816 55.5357142857143 55.5867346938775 55.6122448979592
2 2 6 1
55.6377551020408 55.6632653061224 55.765306122449 56.2244897959184
1 2 1 3
56.2755102040816 56.3010204081633 56.7857142857143 56.8367346938775
2 1 3 2
56.8877551020408 56.9132653061224 57.4234693877551 57.4744897959184
1 2 1 5
58.0357142857143 58.0867346938775 58.1122448979592 58.1377551020408
5 6 1 4
58.6989795918367 58.7244897959184 58.75 59.234693877551
1 4 2 1
59.2857142857143 59.3367346938775 59.3622448979592 59.3877551020408
3 4 1 1
59.8979591836735 59.9744897959184 60.5357142857143 60.5867346938775
2 4 2 5
60.6122448979592 60.6377551020408 61.0969387755102 61.1479591836735
1 4 2 1
61.2244897959184 61.25 61.7857142857143 61.8367346938775
3 1 2 8
61.8622448979592 61.8877551020408 62.4489795918367 62.4744897959184
1 3 1 1
63.0357142857143 63.0867346938775 63.1377551020408 63.1887755102041
6 7 3 1
63.2142857142857 63.6479591836735 63.6734693877551 63.6989795918367
1 1 1 2
63.7244897959184 63.75 64.234693877551 64.2857142857143
2 2 1 6
64.3367346938775 64.3877551020408 64.4132653061224 64.8469387755102
3 2 1 1
64.8979591836735 64.9744897959184 65 65.0255102040816
4 3 1 1
65.5357142857143 65.5867346938775 65.6377551020408 66.1479591836735
2 3 1 3
66.1989795918367 66.2244897959184 66.25 66.2755102040816
1 3 2 1
66.734693877551 66.7857142857143 66.8367346938775 66.8622448979592
1 4 8 1
66.8877551020408 67.3979591836735 67.4489795918367 67.4744897959184
2 5 2 3
67.5 68.0357142857143 68.0867346938775 68.1122448979592
2 1 9 1
68.1377551020408 68.5969387755102 68.6479591836735 68.6989795918367
2 4 1 7
68.7244897959184 68.75 69.234693877551 69.3367346938775
1 4 1 7
69.3877551020408 69.4387755102041 69.9489795918367 69.9744897959184
2 1 1 5
70.4336734693878 70.5357142857143 70.5867346938775 70.6377551020408
1 2 10 5
70.6632653061224 71.0969387755102 71.2244897959184 71.25
1 1 1 2
71.2755102040816 71.7857142857143 71.8367346938775 71.9132653061224
1 3 10 1
72.3979591836735 72.4489795918367 72.4744897959184 72.5
2 1 2 3
72.6275510204082 72.984693877551 73.0357142857143 73.0867346938775
1 1 10 20
73.1377551020408 73.1887755102041 73.2142857142857 73.5969387755102
7 1 1 1
73.6479591836735 73.6734693877551 73.6989795918367 73.7244897959184
6 1 3 2
73.75 73.7755102040816 73.8265306122449 74.234693877551
7 2 1 1
74.2857142857143 74.3367346938775 74.3877551020408 74.8469387755102
8 17 6 1
74.8979591836735 74.9489795918367 74.9744897959184 75
2 1 2 6
75.0255102040816 75.484693877551 75.5357142857143 75.5612244897959
1 1 10 1
75.5867346938775 75.6377551020408 75.6887755102041 75.9948979591837
18 12 1 1
76.0969387755102 76.1479591836735 76.1989795918367 76.2244897959184
1 2 7 3
76.25 76.2755102040816 76.6836734693878 76.7857142857143
8 2 1 5
76.8367346938775 76.8622448979592 76.8877551020408 76.9387755102041
17 1 13 2
76.9642857142857 77.2959183673469 77.3469387755102 77.3979591836735
1 1 2 7
77.4489795918367 77.4744897959184 77.5 78.0357142857143
3 1 9 10
78.0867346938775 78.1377551020408 78.5969387755102 78.6479591836735
33 15 1 6
78.6734693877551 78.6989795918367 78.7244897959184 78.75
1 9 3 15
78.7755102040816 79.2857142857143 79.3367346938775 79.3877551020408
1 14 31 13
79.8469387755102 79.8979591836735 79.9489795918367 79.9744897959184
1 5 10 1
80 80.0255102040816 80.0765306122449 80.484693877551
9 1 1 1
80.5357142857143 80.5867346938775 80.6377551020408 80.6632653061224
20 56 20 1
80.6887755102041 81.0969387755102 81.1479591836735 81.1989795918367
3 1 9 14
81.2244897959184 81.25 81.3265306122449 81.734693877551
2 16 1 1
81.7857142857143 81.8367346938775 81.8877551020408 81.9387755102041
17 53 24 2
82.3469387755102 82.3979591836735 82.4489795918367 82.5
1 15 20 17
82.5255102040816 82.984693877551 83.0357142857143 83.0867346938775
1 1 11 53
83.1377551020408 83.1887755102041 83.5969387755102 83.6479591836735
28 3 3 23
83.6989795918367 83.75 83.7755102040816 83.8265306122449
19 27 1 1
83.8775510204082 84.2857142857143 84.3367346938775 84.3877551020408
1 6 68 45
84.4387755102041 84.8469387755102 84.8979591836735 84.9489795918367
6 4 42 39
85 85.0255102040816 85.484693877551 85.5357142857143
36 1 1 5
85.5867346938775 85.6377551020408 85.6887755102041 86.0969387755102
60 47 1 5
86.1479591836735 86.1989795918367 86.2244897959184 86.25
40 64 1 55
86.734693877551 86.7857142857143 86.8367346938775 86.8877551020408
1 7 39 56
86.9387755102041 87.3469387755102 87.3979591836735 87.4489795918367
6 3 42 103
87.5 87.5255102040816 88.0357142857143 88.0867346938775
89 1 3 27
88.1377551020408 88.1887755102041 88.5969387755102 88.6479591836735
54 3 1 45
88.6989795918367 88.75 89.2857142857143 89.3367346938775
130 109 2 23
89.3877551020408 89.4387755102041 89.8469387755102 89.8979591836735
50 4 8 45
89.9489795918367 90 90.5867346938775 90.6377551020408
169 207 19 41
90.6887755102041 91.0969387755102 91.1479591836735 91.1989795918367
7 2 43 203
91.25 91.3775510204082 91.8367346938775 91.8877551020408
268 1 9 36
91.9387755102041 92.3469387755102 92.3979591836735 92.4489795918367
3 1 25 192
92.5 93.1377551020408 93.1887755102041 93.6479591836735
391 29 3 16
93.6989795918367 93.75 94.3877551020408 94.4387755102041
189 450 6 4
94.8979591836735 94.9489795918367 95 95.6887755102041
13 157 599 3
96.1989795918367 96.25 97.4489795918367 97.5
114 551 48 576
98.75 100
518 429
Code
data_scale2$ SF_ph <- log10 (data_scale$ SF_ph)
table (data_scale2$ SF_ph)
1.13991119808611 1.17756594462169 1.17903563970246 1.21085336531489
1 1 1 1
1.34070709681079 1.36384213065636 1.37659557672604 1.37752554385206
1 1 1 1
1.40844764578854 1.42914230416503 1.43892963627752 1.44889552749531
1 1 1 1
1.46778925660933 1.48589517902717 1.49378513888608 1.50397124267296
1 1 1 2
1.5115423366332 1.51324478680192 1.52065728528638 1.52892981125237
1 1 1 1
1.53672588265145 1.55159935126669 1.55222110438921 1.57314404682283
1 1 1 1
1.587985704539 1.59507985904269 1.60178292944813 1.60233687656648
3 2 1 1
1.60261358538878 1.60865699638119 1.60920225003964 1.61461746336559
1 2 1 1
1.61515528941811 1.62760618219906 1.62812817082188 1.62943040412713
4 1 2 1
1.63434358255055 1.63511429168255 1.64072475056672 1.64676370509219
1 1 4 1
1.64751270409687 1.65296624527886 1.6541961936566 1.65908144743944
2 3 1 1
1.66487212632034 1.66535121570362 1.67058660984477 1.67646030611031
1 1 1 3
1.68156425299621 1.68202528752135 1.68729230334762 1.68774730022727
2 2 1 4
1.68820182091962 1.69384355369865 1.69874836897428 1.70359840851809
2 2 2 2
1.70403664718485 1.7042556007977 1.70904486166394 1.70947764145252
3 1 1 2
1.70990999040003 1.71420997089276 1.71463763659142 1.71994822467427
2 1 3 6
1.72037027959757 1.72415042951464 1.72456842231101 1.72498601319117
1 1 3 5
1.72561164760703 1.73017229982901 1.73058453952005 1.73509353641828
1 3 1 4
1.73529738269374 1.73550113333408 1.73611181234059 1.74016120747629
2 1 1 1
1.74056407808209 1.74457236202064 1.7449711632258 1.74517042658415
2 2 6 1
1.74536959855824 1.74556867923187 1.74636409059323 1.74992552315929
1 2 1 3
1.75031944108371 1.7505162661412 1.75423909297823 1.75462911948123
2 1 3 2
1.7550187960277 1.75521350326338 1.75908942798006 1.75947512470337
1 2 1 5
1.76369533397267 1.76407696359469 1.76426765272262 1.76445825815992
5 6 1 4
1.76863055164819 1.76881925227332 1.76900787094377 1.7725761483821
1 4 2 1
1.77295005669784 1.77332364337197 1.77351031626627 1.77369690895739
3 4 1 1
1.77741202555512 1.77796656210448 1.78201167119688 1.78237754694043
2 4 2 5
1.7825603692887 1.78274311470772 1.78601945073012 1.7863819670132
1 4 2 1
1.78692517469115 1.78710609303657 1.79088807178658 1.79124654847379
3 1 2 8
1.79142567591783 1.7916047295101 1.79552534645307 1.79570271810426
1 3 1 1
1.79958667838162 1.79993804934084 1.80028913624913 1.80063993956538
6 7 3 1
1.80081523501959 1.80378448293895 1.80395851398993 1.80413247533089
1 1 1 2
1.80430636701766 1.80448018910599 1.80776965875139 1.80811447376109
2 2 1 6
1.80845901521661 1.80880328355164 1.80897531543422 1.81188947919753
3 2 1 1
1.81223103995592 1.81274287794316 1.81291335664286 1.81308376844881
4 3 1 1
1.81647803724589 1.8168160096224 1.81715371918989 1.82051644974889
2 3 1 3
1.82085129516402 1.82101862110787 1.82118588260885 1.82135307971655
1 3 2 1
1.82435167263177 1.82468357519428 1.82501522429929 1.82518095392614
1 4 8 1
1.82534662033361 1.82864674625805 1.82897538379315 1.82913960935075
2 5 2 3
1.82930377283102 1.83273694866942 1.83306250676705 1.83322519434412
2 1 9 1
1.83338782100092 1.83630473520284 1.8366276307433 1.83695028639105
2 4 1 7
1.83711152436651 1.8372727025023 1.84032377630326 1.84096338537602
1 4 1 7
1.84128283701374 1.84160205384686 1.84478138343304 1.84493974058407
2 1 1 5
1.84778033961881 1.84840906862026 1.84872309212049 1.84903688872512
1 2 10 5
1.84919370204399 1.85185090169285 1.85262934693067 1.85278486868055
1 1 1 2
1.85294033475771 1.85603802607827 1.85634658344962 1.85680900885115
1 3 10 1
1.859726324101 1.86003227302658 1.86018516670248 1.86033800657099
2 1 2 3
1.8611014001265 1.86323179078481 1.86353528100114 1.86383855928295
1 1 10 20
1.86414162592603 1.86444448122554 1.86459582971354 1.86685975047129
7 1 1 1
1.86716071686026 1.86731112187714 1.86746147482374 1.86761177573609
6 1 3 2
1.8677620246502 1.86791222160204 1.86821245976256 1.87060692196545
7 2 1 1
1.87090530362054 1.87120348041351 1.87150145262548 1.87417404248681
8 17 6 1
1.87446998422358 1.87476572443378 1.87491351905216 1.8750612633917
2 1 2 6
1.87520895748661 1.87785889814018 1.87815234036884 1.87829898716473
1 1 10 1
1.87844558445959 1.87873863067982 1.87903147929638 1.88078443619459
18 12 1 1
1.88136718634161 1.88165826844493 1.88194915558367 1.8820945261229
1 2 7 3
1.88223984801882 1.88238512130397 1.88470290923043 1.88528042857339
8 2 1 5
1.88556890050821 1.8857130646529 1.88585718095816 1.88614527017728
17 1 13 2
1.88628924315453 1.88815656148185 1.88844312993956 1.88872950943025
1 1 2 7
1.88901570020299 1.88915872489781 1.88930170250631 1.89229340996422
3 1 9 10
1.89257726257688 1.89286092978612 1.89540563129648 1.89568745770605
33 15 1 6
1.89582830235846 1.8959691013488 1.89610985470667 1.89625056246164
1 9 3 15
1.89639122464324 1.89919494310842 1.89947432200638 1.89975352129719
1 14 31 13
1.90225827052599 1.90253568636545 1.90281292511211 1.90295147814628
1 5 10 1
1.90308998699194 1.90322845167729 1.90350524867959 1.9057132965597
9 1 1 1
1.90598851487176 1.90626355888469 1.90653842881912 1.90667579857573
20 56 20 1
1.90681312489527 1.90900446089432 1.90927760208691 1.90955057160055
3 1 9 14
1.90968699204517 1.90982336965091 1.91023224570362 1.91240644039174
2 16 1 1
1.91267745099767 1.91294829259167 1.91321896538441 1.91348946958619
17 53 24 2
1.91564745902958 1.91591645531065 1.91618528508209 1.91645394854993
1 15 20 17
1.91658821798426 1.9189979962614 1.91926492588375 1.91953169154442
1 1 11 53
1.91979829344469 1.9200647317855 1.92219037436192 1.92245534964891
28 3 3 23
1.92272016336559 1.92298481570888 1.92311708142695 1.92338149207859
19 27 1 1
1.92364574184756 1.92575397162789 1.92601678221497 1.92627943386005
1 6 68 45
1.92654192675526 1.92863617786304 1.92889725059823 1.92915816648586
6 4 42 39
1.92941892571429 1.92954924664007 1.93188836081481 1.93214748640836
36 1 1 5
1.93240645748455 1.93266527422756 1.93292393682121 1.93498771014659
60 47 1 5
1.93524499361495 1.93550212475444 1.9356306332572 1.93575910374531
40 64 1 55
1.9381928500218 1.93844824225609 1.93870348439209 1.93895857660613
1 7 39 56
1.93921351907421 1.94124768898466 1.94150129160903 1.9417547462307
6 3 42 103
1.94200805302231 1.94213465103572 1.94465889227103 1.944910511329
89 1 3 27
1.94516198468976 1.94541331252195 1.94741871629031 1.94766874190568
54 3 1 45
1.9479186236628 1.94816836172713 1.95078197732982 1.95103007472697
130 109 2 23
1.95127803047559 1.95152584473732 1.9535032846108 1.95374983271955
50 4 8 45
1.95399624094285 1.95424250943932 1.95706460528116 1.95730914046887
169 207 19 41
1.95755353804533 1.95950378317232 1.95974694918198 1.95998997911664
7 2 43 203
1.96023287312851 1.96083951449256 1.96301643374683 1.96325764146306
268 1 9 36
1.96349871528657 1.96542250351271 1.96566237895758 1.96590212198432
3 1 25 192
1.96614173273903 1.96912576592927 1.96936360519146 1.97149831748353
391 29 3 16
1.97173486132484 1.97197127639976 1.97491565704654 1.97515034739643
189 450 6 4
1.97725687286144 1.97749030177429 1.97772360528885 1.98086099713027
13 157 599 3
1.98317046538516 1.98340073818054 1.98877729589125 1.98900461569854
114 551 48 576
1.9945371042985 2
518 429
Code
SF_ph_preF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (SF_ph_preF_factor_fix), col = "grey" )
qqline (resid (SF_ph_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_preF_factor_fix)
W = 0.67074, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.065418 1 1.032191
Weight 1.206128 1 1.098239
Height 1.140722 1 1.068046
as.factor(Intervention_months_factor) 1.004341 4 1.000542
Postmenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_ph_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_postF
BP = 24.922, df = 7, p-value = 0.0007833
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_postF)
W = 0.80407, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 4246 16.2 14.0 11.8 7.24 4 87.0 -70.8 0.0107 10.8
2 4440 29.4 4.98 8.81 4.24 4 87.7 -58.3 0.00984 10.9
3 6837 32.6 6.98 -14.2 -13.8 3 88.2 -55.6 0.0123 10.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 14 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 289 33.2 2.98 12.8 1.24 2 84.6 -51.4 0.00668 10.9
2 1423 40.7 4.98 -13.2 -6.76 2 89.1 -48.4 0.00496 10.9
3 2400 41.2 23.0 -8.19 -0.760 0 89.2 -48.0 0.00412 10.9
4 2408 27.5 17.0 6.81 -1.76 0 86.5 -59.0 0.00279 10.9
5 2658 38.7 28.0 -9.19 -19.8 0 88.4 -49.7 0.00639 10.9
6 3087 38.7 12.0 -7.19 -6.76 2 87.8 -49.1 0.00354 10.9
7 3814 23.9 6.98 9.81 -2.76 0 86.1 -62.3 0.00328 10.9
8 4246 16.2 14.0 11.8 7.24 4 87.0 -70.8 0.0107 10.8
9 4440 29.4 4.98 8.81 4.24 4 87.7 -58.3 0.00984 10.9
10 5472 35.6 11.0 14.8 7.24 0 85.6 -50.0 0.00585 10.9
11 5626 42.5 6.98 -1.19 -2.76 0 88.2 -45.7 0.00236 10.9
12 5887 40.0 9.98 17.8 2.24 1 85.2 -45.2 0.00693 10.9
13 6837 32.6 6.98 -14.2 -13.8 3 88.2 -55.6 0.0123 10.9
14 6898 41.2 15.0 -8.19 -10.8 3 87.0 -45.9 0.0107 10.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_postF_factor_fix <- lm (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_postF_factor_fix), col = "grey" )
qqline (resid (SF_ph_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_postF_factor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.033504 1 1.016614
Weight 1.150857 1 1.072780
Height 1.172847 1 1.082981
as.factor(Intervention_months_factor) 1.012156 4 1.001511
Males:
Code
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Bad
plot (SF_ph_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_ph_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_ph_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_ph_M
BP = 33.594, df = 7, p-value = 2.052e-05
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_ph_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_ph_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_ph_M)
W = 0.77114, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_ph_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_ph_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_ph Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1657 15.1 20.0 1.81 -4.76 0 89.9 -74.7 0.00149 8.51
2 3057 15.1 13.0 71.8 2.24 4 83.0 -67.9 0.0117 8.53
3 3062 23.1 19.0 23.8 -11.8 2 86.0 -62.9 0.00532 8.54
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_ph), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 84 × 13
.rownames SF_ph Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 407 56.8 25.0 -3.19 3.24 2 90.7 -33.9 0.00263 8.59
2 437 40.6 -14.0 -3.19 12.2 2 92.9 -52.3 0.00252 8.56
3 440 49.4 13.0 -7.19 -3.76 2 90.7 -41.2 0.00256 8.58
4 581 63.1 -12.0 -7.19 1.24 0 92.6 -29.5 0.00124 8.59
5 635 47.5 20.0 9.81 1.24 0 89.7 -42.2 0.00103 8.58
6 662 40.0 24.0 16.8 1.24 0 88.7 -48.7 0.00138 8.57
7 755 26.9 -20.0 7.81 6.24 0 91.7 -64.8 0.00140 8.54
8 759 44.4 12.0 -6.19 3.24 0 91.9 -47.5 0.00103 8.57
9 823 58.0 16.0 46.8 7.24 0 86.3 -28.3 0.00388 8.59
10 825 59.3 27.0 -8.19 -9.76 0 90.2 -30.9 0.00258 8.59
# … with 74 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_ph_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_ph_Mfactor_fix <- lm (SF_ph~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_ph_Mfactor_fix), col = "grey" )
qqline (resid (SF_ph_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_ph_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_ph_Mfactor_fix)
W = 0.68121, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_ph_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092442 1 1.045200
Weight 1.274162 1 1.128788
Height 1.251161 1 1.118553
as.factor(Intervention_months_factor) 1.007564 4 1.000942
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_ph_preF_robust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_ph_postF_robust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_ph_Mrobust <- lmrob (SF_ph ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_ph_preF_robust, SF_ph_postF_robust, SF_ph_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), show.reflvl = T, title = "Physical health (Sf-36), higher score indicates better health (robust regression)" , digits = 3 )
Physical health (Sf-36), higher score indicates better health (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
92.473
91.820 – 93.126
<0.001
91.859
90.882 – 92.836
<0.001
93.180
92.842 – 93.519
<0.001
Age
0.056
0.021 – 0.090
0.001
-0.053
-0.106 – -0.000
0.049
-0.026
-0.038 – -0.013
<0.001
Weight
-0.098
-0.125 – -0.071
<0.001
-0.107
-0.140 – -0.073
<0.001
-0.067
-0.084 – -0.049
<0.001
Height
0.067
0.023 – 0.111
0.003
0.036
-0.023 – 0.095
0.228
0.056
0.026 – 0.086
<0.001
6-12 months
-0.042
-0.760 – 0.676
0.909
-0.331
-1.320 – 0.659
0.512
-0.425
-0.975 – 0.124
0.129
12-18 months
0.324
-0.391 – 1.038
0.375
-1.117
-2.143 – -0.092
0.033
-0.466
-1.001 – 0.069
0.088
18-24 months
0.106
-0.788 – 1.001
0.816
-1.014
-2.572 – 0.543
0.201
-0.352
-1.069 – 0.365
0.336
24+ months
-0.640
-1.471 – 0.192
0.131
0.777
-0.429 – 1.983
0.207
-0.119
-0.640 – 0.402
0.653
Observations
2398
1468
3582
R2 / R2 adjusted
0.032 / 0.029
0.040 / 0.035
0.032 / 0.030
Mental health
Coded using https://meetinstrumentenzorg.nl/wp-content/uploads/instrumenten/SF-36-RAND-36-form.pdf
Predict by ferritin
Code
res <- cor.test (data_scale$ SF_mh, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = SF_ph, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 75 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Mental health (SF-36), higher is better" , y= "Ferritin" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= SF_mh)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Mental health (SF-36), higher is better" )
Models
Code
SF_mh_preF<- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF <- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_M <- lm (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF, SF_mh_postF, SF_mh_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Mental health (Sf-36), higher score indicates better health" , show.reflvl = T, digits= 3 )
Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
80.147
78.628 – 81.666
<0.001
77.548
75.935 – 79.161
<0.001
81.271
80.582 – 81.961
<0.001
Age
0.266
0.185 – 0.348
<0.001
0.268
0.176 – 0.361
<0.001
0.167
0.140 – 0.194
<0.001
Weight
-0.054
-0.109 – 0.001
0.054
-0.087
-0.144 – -0.031
0.003
-0.026
-0.061 – 0.009
0.142
Height
0.094
-0.007 – 0.194
0.068
0.102
-0.002 – 0.205
0.055
0.075
0.013 – 0.136
0.017
6-12 months
-1.196
-2.917 – 0.525
0.173
0.540
-1.203 – 2.283
0.543
-0.525
-1.694 – 0.644
0.378
12-18 months
1.321
-0.375 – 3.017
0.127
0.315
-1.330 – 1.960
0.707
-0.683
-1.799 – 0.433
0.230
18-24 months
-2.222
-4.358 – -0.087
0.041
-0.597
-3.168 – 1.974
0.649
-0.839
-2.392 – 0.713
0.289
24+ months
-0.744
-2.590 – 1.102
0.429
0.322
-1.804 – 2.449
0.766
-0.556
-1.750 – 0.638
0.361
Observations
2404
1476
3595
R2 / R2 adjusted
0.023 / 0.020
0.028 / 0.024
0.042 / 0.040
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance meh (heteroscedasticity)
plot (SF_mh_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (SF_mh_preF, which = 3 ) # Linearity bad, constant variance meh
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_preF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_preF
BP = 26.608, df = 7, p-value = 0.0003921
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_preF , 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_preF)) #reject the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_preF)
W = 0.82854, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3925 18.2 -2.02 6.81 7.24 3 77.7 -59.4 0.00698 14.9
2 4065 11.2 -17.0 35.8 -10.8 1 71.5 -60.2 0.0109 14.9
3 7349 33.4 -13.0 49.8 -0.760 1 72.7 -39.3 0.0130 14.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 5 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1659 15.6 -12.0 26.8 1.24 0 75.6 -60.0 0.00459 14.9
2 2029 11.9 -23.0 -13.2 -1.76 0 74.6 -62.7 0.00187 14.9
3 4065 11.2 -17.0 35.8 -10.8 1 71.5 -60.2 0.0109 14.9
4 4595 14.9 -16.0 6.81 1.24 0 75.6 -60.8 0.00197 14.9
5 6838 8.38 -20.0 -11.2 -1.76 3 73.0 -64.7 0.00478 14.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ SF_mh)
3 8.375 11.25 11.875
1 1 1 1
12.5 13.75 14.875 15.25
1 1 1 1
15.625 16.25 16.875 17.125
1 1 1 2
17.25 17.5 18.25 18.375
2 1 1 1
19.25 19.5 20 20.5
2 1 1 2
20.625 21.125 22.25 22.3333333333333
1 1 1 1
22.625 23 24.125 24.25
3 1 1 1
24.5 24.625 24.75 24.875
1 1 1 1
25.5 25.625 25.75 25.875
2 1 3 1
25.9583333333333 26.25 26.375 26.5
1 2 2 2
26.75 27.125 27.25 27.625
1 2 2 2
27.75 28 28.2083333333333 28.25
2 2 1 1
28.3333333333333 28.375 28.5 28.625
1 1 2 2
29.25 29.625 29.7083333333333 29.9583333333333
1 2 1 2
30.125 30.2083333333333 30.25 30.5
2 1 1 2
30.75 31 31.125 31.25
1 2 1 2
31.5 31.625 31.875 32.125
3 1 4 3
32.25 32.3333333333333 32.375 32.5
2 1 1 1
32.75 32.875 33.125 33.375
3 1 2 2
33.5 33.5833333333333 33.625 33.7083333333333
3 1 2 1
33.75 33.875 33.9583333333333 34
4 6 1 1
34.125 34.25 34.375 34.5
1 1 5 1
34.625 34.7083333333333 34.75 34.875
2 2 1 1
35 35.125 35.2083333333333 35.25
3 4 1 2
35.3333333333333 35.375 35.5 35.625
1 2 2 1
35.75 36.0833333333333 36.125 36.2083333333333
1 1 2 1
36.25 36.375 36.625 36.75
2 1 7 1
36.875 36.9583333333333 37.0833333333333 37.125
2 1 1 1
37.4583333333333 37.5 37.625 37.875
1 1 4 1
37.9583333333333 38.25 38.375 38.4583333333333
1 1 2 4
38.5 38.5833333333333 38.625 38.75
2 1 1 1
38.8333333333333 38.875 38.9583333333333 39
1 2 1 1
39.125 39.2083333333333 39.25 39.375
1 1 1 1
39.5 39.5833333333333 39.625 39.75
2 1 2 2
39.875 40.0833333333333 40.125 40.2083333333333
1 1 1 1
40.3333333333333 40.375 40.5 40.5833333333333
2 1 1 1
40.625 40.7083333333333 40.75 40.875
1 1 3 2
41 41.125 41.1666666666667 41.2083333333333
2 3 1 1
41.25 41.4583333333333 41.5 41.5833333333333
1 1 1 1
41.7083333333333 41.75 41.8333333333333 41.9583333333333
3 2 2 1
42 42.0833333333333 42.125 42.2083333333333
1 3 3 2
42.25 42.3333333333333 42.4583333333333 42.5
1 1 3 1
42.7083333333333 42.75 42.875 42.9583333333333
1 9 4 1
43 43.125 43.25 43.3333333333333
1 2 2 1
43.375 43.5 43.625 43.7083333333333
2 2 1 1
43.75 43.9583333333333 44 44.125
4 2 3 2
44.1666666666667 44.2083333333333 44.25 44.3333333333333
1 2 1 2
44.375 44.4583333333333 44.625 44.7083333333333
2 2 1 1
44.8333333333333 45.2083333333333 45.25 45.3333333333333
1 1 2 1
45.375 45.4583333333333 45.5416666666667 45.5833333333333
4 1 1 3
45.7083333333333 45.875 46.5 46.5416666666667
2 1 2 1
46.5833333333333 46.625 46.7083333333333 46.75
3 1 2 2
46.8333333333333 46.9583333333333 47.0416666666667 47.125
3 1 1 1
47.2083333333333 47.25 47.4583333333333 47.5
1 2 2 3
47.5833333333333 47.6666666666667 47.7083333333333 47.8333333333333
1 1 1 2
47.875 47.9583333333333 48 48.125
3 1 2 1
48.2083333333333 48.25 48.2916666666667 48.3333333333333
6 1 1 1
48.4583333333333 48.5 48.5416666666667 48.5833333333333
2 5 1 1
48.8333333333333 48.9583333333333 49.0833333333333 49.125
5 2 1 2
49.2083333333333 49.25 49.375 49.4583333333333
1 2 3 4
49.5 49.625 49.7083333333333 49.875
1 2 2 1
49.9583333333333 50.0833333333333 50.1666666666667 50.2083333333333
1 3 1 1
50.2916666666667 50.3333333333333 50.375 50.4583333333333
1 1 3 1
50.5833333333333 50.625 50.7083333333333 50.7916666666667
1 1 1 2
50.8333333333333 50.875 50.9166666666667 50.9583333333333
1 1 2 2
51.0416666666667 51.0833333333333 51.2083333333333 51.25
1 1 2 3
51.3333333333333 51.375 51.4583333333333 51.7083333333333
2 1 2 4
51.75 51.7916666666667 51.8333333333333 51.9166666666667
2 4 3 1
52 52.0416666666667 52.0833333333333 52.125
1 1 2 1
52.1666666666667 52.2083333333333 52.25 52.3333333333333
1 1 1 1
52.375 52.4583333333333 52.5833333333333 52.625
1 2 3 1
52.6666666666667 52.7083333333333 52.75 52.7916666666667
1 1 2 3
52.875 52.9583333333333 53 53.0416666666667
2 1 1 4
53.0833333333333 53.1666666666667 53.2083333333333 53.25
3 1 4 1
53.3333333333333 53.4583333333333 53.5 53.5416666666667
1 2 4 1
53.5833333333333 53.7083333333333 53.75 53.7916666666667
4 1 1 1
53.8333333333333 53.875 53.9583333333333 54.125
2 1 2 1
54.1666666666667 54.2083333333333 54.2916666666667 54.3333333333333
1 2 4 1
54.4166666666667 54.4583333333333 54.5833333333333 54.625
3 3 2 1
54.6666666666667 54.75 54.7916666666667 54.8333333333333
1 1 2 3
54.875 54.9166666666667 54.9583333333333 55
2 3 1 2
55.0833333333333 55.1666666666667 55.2083333333333 55.25
3 1 1 2
55.2916666666667 55.4166666666667 55.4583333333333 55.7083333333333
2 1 2 3
55.75 55.7916666666667 55.8333333333333 55.9166666666667
1 5 2 1
56 56.0416666666667 56.0833333333333 56.1666666666667
1 1 1 2
56.2083333333333 56.25 56.2916666666667 56.3333333333333
2 1 1 1
56.375 56.4583333333333 56.5416666666667 56.5833333333333
1 3 4 3
56.625 56.6666666666667 56.7083333333333 56.75
1 1 4 1
56.8333333333333 56.875 56.9166666666667 56.9583333333333
2 1 2 3
57 57.0416666666667 57.0833333333333 57.1666666666667
1 3 2 2
57.2083333333333 57.3333333333333 57.4166666666667 57.4583333333333
2 5 2 2
57.5416666666667 57.5833333333333 57.6666666666667 57.7083333333333
3 1 1 2
57.7916666666667 57.8333333333333 57.9166666666667 57.9583333333333
2 1 1 1
58 58.0833333333333 58.125 58.1666666666667
3 3 1 5
58.2916666666667 58.3333333333333 58.375 58.4166666666667
2 2 1 1
58.4583333333333 58.5416666666667 58.5833333333333 58.6666666666667
1 2 1 1
58.7083333333333 58.75 58.7916666666667 58.8333333333333
2 1 5 5
58.875 58.9166666666667 58.9583333333333 59
4 2 2 1
59.0416666666667 59.0833333333333 59.25 59.2916666666667
1 1 1 2
59.3333333333333 59.4583333333333 59.5 59.5416666666667
1 2 1 4
59.5833333333333 59.6666666666667 59.7916666666667 59.8333333333333
2 3 4 3
59.9166666666667 59.9583333333333 60 60.0833333333333
1 1 1 2
60.1666666666667 60.2083333333333 60.2916666666667 60.375
1 1 1 2
60.4583333333333 60.5 60.5416666666667 60.5833333333333
2 4 1 1
60.6666666666667 60.75 60.7916666666667 60.8333333333333
2 1 1 2
60.875 60.9583333333333 61.0416666666667 61.0833333333333
2 1 1 2
61.125 61.1666666666667 61.2083333333333 61.25
1 3 1 1
61.2916666666667 61.3333333333333 61.4166666666667 61.4583333333333
2 1 1 3
61.5416666666667 61.5833333333333 61.6666666666667 61.7083333333333
5 2 4 2
61.7916666666667 61.875 61.9166666666667 61.9583333333333
4 3 4 2
62 62.0416666666667 62.0833333333333 62.125
3 3 2 2
62.1666666666667 62.2083333333333 62.25 62.2916666666667
3 1 1 2
62.375 62.4166666666667 62.5 62.5416666666667
4 1 2 4
62.5833333333333 62.625 62.6666666666667 62.7916666666667
2 4 3 1
62.8333333333333 62.875 62.9166666666667 63
1 2 3 1
63.0416666666667 63.0833333333333 63.1666666666667 63.2083333333333
2 3 6 1
63.25 63.375 63.4166666666667 63.4583333333333
1 3 1 1
63.5 63.5416666666667 63.5833333333333 63.625
1 1 1 3
63.6666666666667 63.75 63.7916666666667 63.875
2 3 2 1
63.9166666666667 64 64.0416666666667 64.0833333333333
3 1 4 1
64.1666666666667 64.2083333333333 64.25 64.2916666666667
4 1 4 2
64.375 64.4166666666667 64.4583333333333 64.5
2 1 1 2
64.5416666666667 64.625 64.6666666666667 64.75
3 3 4 3
64.7916666666667 64.9166666666667 64.9583333333333 65
1 4 1 2
65.0416666666667 65.0833333333333 65.125 65.1666666666667
3 1 1 5
65.2083333333333 65.25 65.2916666666667 65.3333333333333
1 2 3 2
65.375 65.4166666666667 65.4583333333333 65.5
3 6 2 5
65.5833333333333 65.625 65.6666666666667 65.75
1 8 5 3
65.7916666666667 65.8333333333333 65.875 65.9166666666667
2 2 5 5
66 66.0416666666667 66.125 66.1666666666667
4 4 1 6
66.25 66.2916666666667 66.375 66.4166666666667
5 2 2 3
66.4583333333333 66.5 66.5416666666667 66.5833333333333
1 6 1 1
66.625 66.6666666666667 66.7083333333333 66.75
2 2 3 4
66.7916666666667 66.8333333333333 66.875 66.9166666666667
3 1 4 3
66.9583333333333 67 67.0416666666667 67.125
1 4 3 4
67.1666666666667 67.25 67.2916666666667 67.375
4 1 2 3
67.4166666666667 67.5 67.5416666666667 67.5833333333333
4 3 1 1
67.625 67.6666666666667 67.75 67.7916666666667
6 4 3 1
67.8333333333333 67.875 67.9166666666667 68
2 5 2 4
68.0416666666667 68.125 68.1666666666667 68.25
3 4 3 2
68.2916666666667 68.375 68.4166666666667 68.5
6 3 6 3
68.5416666666667 68.5833333333333 68.625 68.6666666666667
3 1 3 6
68.75 68.7916666666667 68.875 68.9166666666667
5 3 2 3
69 69.0416666666667 69.125 69.1666666666667
5 2 4 1
69.25 69.2916666666667 69.375 69.4166666666667
11 5 9 2
69.5 69.5416666666667 69.5833333333333 69.625
3 4 1 7
69.6666666666667 69.75 69.7916666666667 69.875
6 5 2 7
69.9166666666667 70 70.0416666666667 70.0833333333333
2 4 1 1
70.125 70.1666666666667 70.25 70.2916666666667
5 5 5 3
70.3333333333333 70.375 70.4166666666667 70.5
1 6 4 4
70.5416666666667 70.625 70.75 70.7916666666667
3 6 8 1
70.875 70.9166666666667 71 71.0833333333333
14 1 3 1
71.125 71.1666666666667 71.25 71.375
6 1 11 13
71.4166666666667 71.5 71.5416666666667 71.625
6 8 3 7
71.6666666666667 71.75 71.7916666666667 71.875
3 8 4 7
71.9166666666667 72 72.125 72.1666666666667
3 11 7 3
72.25 72.2916666666667 72.375 72.4166666666667
17 3 9 3
72.5 72.5416666666667 72.625 72.6666666666667
8 4 7 2
72.75 72.875 72.9166666666667 73
6 12 1 9
73.0416666666667 73.125 73.1666666666667 73.25
5 16 2 7
73.375 73.4166666666667 73.5 73.625
18 7 13 9
73.6666666666667 73.75 73.7916666666667 73.875
4 14 4 9
73.9166666666667 74 74.0416666666667 74.125
4 13 2 10
74.1666666666667 74.25 74.375 74.4166666666667
3 15 20 1
74.5 74.625 74.6666666666667 74.75
12 19 5 13
74.7916666666667 74.875 74.9166666666667 75
1 10 2 19
75.0416666666667 75.125 75.1666666666667 75.25
1 8 1 17
75.375 75.4166666666667 75.5 75.625
28 2 16 16
75.6666666666667 75.75 75.7916666666667 75.875
5 19 1 20
75.9166666666667 76 76.0416666666667 76.125
6 14 5 14
76.1666666666667 76.25 76.375 76.5
1 18 15 13
76.5416666666667 76.625 76.6666666666667 76.75
1 20 1 12
76.875 76.9166666666667 77 77.125
17 3 24 10
77.1666666666667 77.25 77.2916666666667 77.375
1 19 2 24
77.4166666666667 77.5 77.625 77.75
2 16 34 20
77.875 77.9166666666667 78 78.125
31 3 26 9
78.1666666666667 78.25 78.375 78.4166666666667
5 18 14 2
78.5 78.625 78.75 78.7916666666667
29 27 33 1
78.875 78.9166666666667 79 79.0416666666667
37 2 32 1
79.125 79.1666666666667 79.25 79.375
16 1 39 17
79.4166666666667 79.5 79.5416666666667 79.625
5 43 1 22
79.6666666666667 79.75 79.875 80
1 57 46 37
80.125 80.1666666666667 80.25 80.375
35 1 31 10
80.4166666666667 80.5 80.625 80.6666666666667
3 52 10 2
80.75 80.7916666666667 80.875 81
41 1 36 55
81.125 81.25 81.375 81.4166666666667
57 36 18 1
81.5 81.625 81.75 81.875
35 10 67 14
82 82.125 82.1666666666667 82.25
95 46 1 47
82.375 82.4166666666667 82.5 82.625
38 1 40 6
82.6666666666667 82.75 82.875 83
1 71 9 127
83.125 83.25 83.375 83.4166666666667
28 76 37 1
83.5 83.625 83.75 83.875
39 16 64 5
84 84.125 84.25 84.375
129 17 152 26
84.5 84.625 84.75 84.875
36 15 43 5
85 85.125 85.1666666666667 85.25
107 3 1 195
85.375 85.5 85.625 85.75
15 119 22 37
85.875 86 86.125 86.25
5 57 2 186
86.375 86.4166666666667 86.5 86.625
3 1 233 14
86.75 86.875 87 87.125
55 8 24 2
87.25 87.375 87.5 87.625
84 1 275 3
87.75 87.875 88 88.125
136 6 33 6
88.25 88.5 88.75 88.875
39 162 238 7
89 89.125 89.25 89.5
61 4 21 64
89.75 89.875 90 90.125
201 2 110 5
90.25 90.5 90.75 91
14 21 102 161
91.125 91.25 91.375 91.5
4 48 1 4
91.75 92 92.25 92.375
38 116 89 1
92.5 92.75 93 93.25
11 2 63 88
93.5 93.625 93.75 94
29 1 1 1
94.25 94.5 94.75 95.5
80 48 1 118
95.75 96 96.5 96.75
5 1 4 18
97.5 97.75 98 98.75
1 2 2 1
99 100
1 4
Code
data_scale2$ SF_mh <- log (data_scale$ SF_mh)
table (data_scale2$ SF_mh)
1.09861228866811 2.12525107771113 2.42036812865043 2.47443534992071
1 1 1 1
2.52572864430826 2.62103882411258 2.69968195143169 2.72457950305342
1 1 1 1
2.74887219562247 2.78809290877575 2.82583323675859 2.84053938414829
1 1 1 2
2.84781214347737 2.86220088092947 2.9041650800285 2.9109910450989
2 1 1 1
2.95751106073379 2.9704144655697 2.99573227355399 3.02042488614436
2 1 1 2
3.02650393222074 3.05045717324324 3.10234200861225 3.10608033072286
1 1 1 1
3.11905548958599 3.13549421592915 3.18324864722505 3.18841661738349
3 1 1 1
3.19867311755068 3.20376218705815 3.2088254890147 3.21386328304466
1 1 1 1
3.23867845216438 3.24356843745857 3.24843462710975 3.25327725158553
2 1 3 1
3.25649268843951 3.26766598903763 3.27241659179623 3.27714473299218
1 2 2 2
3.28653447334202 3.30045581186062 3.30505352110925 3.31872115983792
1 2 2 2
3.32323584019244 3.3322045101752 3.33961744256433 3.34109345759245
2 2 1 1
3.34403896782221 3.34550847580157 3.3499040872746 3.3542804618744
1 1 2 2
3.37587957367787 3.3886185994553 3.39142759006635 3.3998075273731
1 2 1 2
3.40535539181082 3.40811782450673 3.40949618447685 3.41772668361337
2 1 1 2
3.42588999425253 3.43398720448515 3.43801135478487 3.44201937618241
1 2 1 2
3.44998754583159 3.45394794704768 3.46182200347859 3.46963454321538
3 1 4 3
3.47351804324178 3.47609868983527 3.4773865200197 3.48124008933569
2 1 1 1
3.48890296208126 3.49271249049793 3.50028828430639 3.50780711672041
3 1 2 2
3.51154543883102 3.51402991215868 3.515269837922 3.51774508671055
3 1 2 1
3.51898041731854 3.52267727919986 3.52513428289292 3.52636052461616
4 6 1 1
3.53003025350512 3.53368656470823 3.53732955598674 3.54095932403731
1 1 5 1
3.5445759645075 3.5469798118189 3.5481795720108 3.55177024014153
2 2 1 1
3.55534806148941 3.55891312765391 3.56128279700923 3.56246552925828
3 4 1 2
3.56482680544396 3.5660053559634 3.56953269648137 3.57304763858881
1 2 2 1
3.57655026914002 3.58583107821449 3.5869851464326 3.58928929491745
1 1 2 1
3.59043938130068 3.59388172549166 3.60073106733723 3.60413822565885
2 1 7 1
3.60753381465998 3.60979115196163 3.61316763237824 3.61429059712286
2 1 1 1
3.62322920412367 3.62434093297637 3.62766872306904 3.63429126382953
1 1 4 1
3.63648906691201 3.64414356027254 3.64740620590736 3.64957540415491
1 1 2 4
3.65065824129374 3.65282040429823 3.65389973521791 3.65713075579936
2 1 1 1
3.65927898433765 3.6603513704994 3.66249269894074 3.66356164612965
1 2 1 1
3.66676164886032 3.66888930923743 3.66995144422842 3.6731310971458
1 1 1 1
3.67630067190708 3.67840815424664 3.67946023219744 3.68260984110034
2 1 2 2
3.68574956110501 3.69096062031776 3.69199958145018 3.69407427099104
1 1 1 1
3.69717825692863 3.69821078154282 3.70130197411249 3.70335747329459
2 1 1 1
3.7043836406499 3.70643282169484 3.70745583968687 3.71051862921742
1 1 3 2
3.71357206670431 3.71661620908554 3.71762886739992 3.71864050127477
2 3 1 1
3.71965111278069 3.72468890681065 3.72569342723665 3.72769944596352
1 1 1 1
3.73070094896727 3.73169945129686 3.73369346990373 3.73667706237062
3 2 2 1
3.73766961828337 3.73965177948736 3.74064138867253 3.74261767390074
1 3 3 2
3.74360435380318 3.74557479779048 3.74852320287478 3.74950407593037
1 1 3 1
3.75439406122456 3.75536919538277 3.7582889054861 3.76023065366901
1 9 4 1
3.76120011569356 3.76410287535152 3.76699723337789 3.76892216178747
1 2 2 1
3.76988323826702 3.77276093809464 3.77563038052259 3.77753877804835
2 2 1 1
3.77849161280362 3.78324221556222 3.78418963391826 3.78702651525346
4 2 3 2
3.78797035675817 3.78891330826604 3.78985537145394 3.79173683955364
1 2 1 2
3.79267624779558 3.79455242095381 3.7982942400998 3.80015991228275
2 2 1 1
3.80295191037378 3.81128143562661 3.81220267014594 3.81404259706794
1 1 2 1
3.81496129258501 3.81679615548513 3.81862765782859 3.81954215263398
4 1 1 3
3.82228062992728 3.82592030637473 3.83945231259331 3.84034796872126
2 1 2 1
3.8412428233671 3.84213687796398 3.84392259272421 3.8448142557347
3 1 2 2
3.84659520010569 3.84926068369183 3.85103373380172 3.85280364576817
3 1 1 1
3.85457043068006 3.85545265393975 3.85985213309924 3.8607297110406
1 2 2 3
3.86248255986801 3.8642323415918 3.86510608564039 3.86772274653157
1 1 1 2
3.86859344750081 3.87033257837394 3.87120101090789 3.87380179260795
3 1 2 1
3.87553189684573 3.87639582778499 3.87725901299181 3.87812145375246
6 1 1 1
3.88070432217072 3.88156379794344 3.88242253565186 3.88328053656249
2 5 1 1
3.88841313978901 3.89096959623031 3.89351953386359 3.89436807018943
5 2 1 2
3.89606298584942 3.8969093676181 3.89944422322129 3.90113056426172
1 2 3 4
3.90197266957464 3.90449473900735 3.90617259174997 3.90951987521003
1 2 2 1
3.91118932467957 3.91368828474721 3.91535079552082 3.91618101557681
1 3 1 1
3.91783939074959 3.91866754814681 3.91949502026685 3.92114791320515
1 1 3 1
3.9236221412715 3.9244455254267 3.92609026263958 3.92773229913333
1 1 1 2
3.92855230737936 3.92937164376276 3.93019030938359 3.93100830533923
1 1 2 2
3.93264229263088 3.93345828614821 3.93590227921809 3.93671561801852
1 1 2 3
3.93834031374552 3.9391516728164 3.94077241871413 3.94561895485666
2 1 2 4
3.94642443214548 3.94722926116277 3.94803344295118 3.94963986899945
2 4 3 1
3.95124371858143 3.95204467977763 3.9528449999484 3.95364468011897
1 1 2 1
3.9544437213121 3.95524212454812 3.95603989084492 3.9576335166802
1 1 1 1
3.9584293782423 3.9600192036964 3.96239921275321 3.96319129200255
1 2 3 1
3.96398274435886 3.96477357081368 3.96556377235618 3.96635334997319
1 1 2 3
3.96793063736644 3.96950544084151 3.97029191355212 3.97107776820946
2 1 1 4
3.97186300578416 3.97343163355679 3.97421502568459 3.97499780458953
3 1 4 1
3.97656152656572 3.97890253426769 3.97968165390196 3.98046016698137
1 2 4 1
3.98123807444962 3.98356817259124 3.98434366700777 3.9851185604987
4 1 1 1
3.9858928539946 3.98666654842391 3.98821214378569 3.99129618632265
2 1 2 1
3.99206571310168 3.99283464816456 3.9943707467769 3.99513791213865
1 2 4 1
3.99667047948843 3.99743588327628 3.99972858584725 4.00049165341575
3 3 2 1
4.00125413915609 4.00277736869661 4.00353811426392 4.00429828153732
1 1 2 3
4.00505787139534 4.00581688471451 4.00657532236937 4.00733318523247
2 3 1 2
4.00884719006369 4.01035890614901 4.01111390807238 4.01186834039786
3 1 1 2
4.01262220398426 4.01488039086785 4.01563198804717 4.020129746754
2 1 2 3
4.02087741034023 4.02162451534323 4.02237106259701 4.02386248718368
1 5 2 1
4.02535169073515 4.02609546168799 4.02683867985673 4.02832346112431
1 1 1 2
4.02906502585981 4.02980604108453 4.03054650761225 4.03128642625496
2 1 1 1
4.03202579782284 4.03350290296586 4.03497782948692 4.0357144777707
1 3 4 3
4.0364505838032 4.03718614838215 4.03792117230352 4.03865565636151
1 1 4 1
4.04012300805546 4.04085587727111 4.04158820978279 4.042320006376
2 1 2 3
4.04305126783455 4.0437819949405 4.04451218847422 4.04597097793788
1 3 2 2
4.04669957542003 4.04888218814534 4.05033462122566 4.05106004744536
2 5 2 2
4.05250932306135 4.05323317397967 4.05467930582967 4.05540158827349
3 1 1 2
4.05684458996689 4.0575653107188 4.05900519577679 4.0597243615755
2 1 1 1
4.06044301054642 4.06187876097252 4.06259586390752 4.06331245297437
3 3 1 5
4.06545914431754 4.0661736852554 4.06688771598906 4.06760123724659
2 2 1 1
4.06831424975451 4.0697387514199 4.07045024202266 4.07187170637004
1 2 1 1
4.07258168155073 4.07329115302427 4.07400012150487 4.07470858770524
2 1 5 5
4.07541655233658 4.07612401610857 4.07683097972939 4.07753744390572
4 2 2 1
4.07824340934273 4.07894887674413 4.08176578001524 4.08246876774191
1 1 1 2
4.08317126162398 4.08527578712889 4.08597631255158 4.08667634758192
1 2 1 4
4.08737589290601 4.08877351717264 4.09086629784578 4.09156291926022
2 3 4 3
4.09295470793305 4.09364987653942 4.0943445622221 4.09573248749695
1 1 1 2
4.09711848910483 4.09781077019859 4.09919389628354 4.10057511197274
1 1 1 2
4.10195442253624 4.1026433650368 4.10333183322234 4.10401982774552
2 4 1 1
4.10539439840869 4.10676708222066 4.10745271817484 4.10813788435444
2 1 1 2
4.10882258140275 4.11019057067218 4.11155669110322 4.11223905209865
2 1 1 2
4.11292094779504 4.11360237882652 4.11428334582593 4.11496384942484
1 3 1 1
4.11564389025349 4.11632346894088 4.11768124240134 4.11835943842597
2 1 1 3
4.11971445218343 4.1203912711602 4.12174353641022 4.12241898391985
5 2 4 2
4.12376851178999 4.12511622088885 4.12578939492976 4.12646211611221
4 3 4 2
4.12713438504509 4.12780620233606 4.12847756859156 4.12914848441679
3 3 2 2
4.12981895041576 4.13048896719125 4.13115853534482 4.13182765547684
3 1 1 2
4.13316455407168 4.13383233372922 4.13516655674236 4.13583300128552
4 1 2 4
4.13649900197613 4.13716455940503 4.13782967416184 4.13982236827855
2 4 3 1
4.14048571821996 4.1411486284199 4.14181109946102 4.14313472639153
1 2 3 1
4.14379588344041 4.14445660364945 4.14577673585437 4.14643614900059
2 3 6 1
4.14709512760763 4.14906946191135 4.14972670807369 4.15038352254722
1 3 1 1
4.15103990589865 4.15169585869357 4.15235138149646 4.15300647487069
1 1 1 3
4.15366113937852 4.15496918403854 4.15562256530974 4.15692804852387
2 3 2 1
4.15758015157926 4.15888308335967 4.15953391319065 4.16018431971764
3 1 4 1
4.16148386505973 4.16213300497217 4.16278172377533 4.16343002201522
4 1 4 2
4.1647253589839 4.16537239879942 4.16601902022512 4.16666522380173
2 1 1 2
4.16731101006892 4.16860133282859 4.16924587039522 4.17053370057965
3 3 4 3
4.17117699426539 4.17310439608275 4.17374603870983 4.17438726989564
1 4 1 2
4.17502809016749 4.17566850005169 4.17630850007353 4.17694809075731
3 1 1 5
4.17758727262631 4.1782260462028 4.17886441200807 4.17950237056241
1 2 3 2
4.18013992238509 4.18077706799441 4.18141380790768 4.18205014264121
3 6 2 5
4.1833215986294 4.18395672091179 4.18459144006988 4.18585967105787
1 8 5 3
4.1864931839077 4.18712629567307 4.18775900686153 4.18839131797965
2 2 5 5
4.18965474202643 4.19028585596344 4.19154689017846 4.19217681145914
4 4 1 6
4.19343546486633 4.19406419798984 4.1953204795621 4.19594802900221
5 2 2 3
4.196575184871 4.19720194766181 4.19782831786707 4.19845429597827
1 6 1 1
4.19907988248601 4.19970507787993 4.20032988264877 4.20095429728036
2 2 3 4
4.20157832226161 4.20220195807851 4.20282520521617 4.20344806415876
3 1 4 3
4.20407053538957 4.20469261939097 4.20531431664444 4.20655655282903
1 4 3 4
4.20717709271863 4.20841701848195 4.20903640530881 4.21027402922916
4 1 2 3
4.21089226727049 4.21212759787848 4.21274469138773 4.21336140432741
4 3 1 1
4.21397773716665 4.21459369037368 4.21582445975981 4.21643927687109
6 4 3 1
4.21705371621454 4.2176677782541 4.21828146345286 4.21950770517611
2 5 2 4
4.22012026262252 4.22134425298341 4.22195568681475 4.22317743406507
3 4 3 2
4.22378774839588 4.22500726074214 4.22561645966443 4.22683374526818
6 3 6 3
4.22744183285153 4.22804955088907 4.22865689982969 4.22926388012147
3 1 3 6
4.23047673654668 4.23108261357218 4.23229326747308 4.23289804523569
5 3 2 3
4.23410650459726 4.23471018707862 4.2359164598425 4.23651905100264
5 2 4 1
4.23772314506745 4.23832464884498 4.2395265720666 4.24012699237884
11 5 9 2
4.24132675257075 4.24192609331389 4.24252507506286 4.24312369824745
3 4 1 7
4.2437219632967 4.24491742070147 4.24551461391122 4.24670793147526
6 5 2 7
4.24730405667921 4.24849524204936 4.24909030306067 4.24968501018495
2 4 1 1
4.25027936384286 4.25087336445433 4.25206030821386 4.25265325219802
5 5 5 3
4.25324584480796 4.25383808645985 4.25442997756917 4.25561270981822
1 6 4 4
4.25620355178519 4.25738418946661 4.25915253652335 4.25974129132399
3 6 8 1
4.26091776204792 4.26150547878537 4.26267987704132 4.26385289770368
14 1 3 1
4.26443889244649 4.26502454400057 4.26619481914876 4.26794766797617
6 1 11 13
4.26853126880978 4.26969744969996 4.27028003054953 4.2714441750349
6 8 3 7
4.27202573945955 4.27318785463973 4.27376840617998 4.27492849911751
3 8 4 7
4.27550804129543 4.27666611901606 4.27840072482826 4.27897825877443
3 11 7 3
4.28013232699254 4.28070886203301 4.28186093589316 4.28243647547739
17 3 9 3
4.28358656186063 4.28416110942024 4.28530921517208 4.28588277412098
8 4 7 2
4.2870289060516 4.28874564467066 4.28931723656961 4.29045944114839
6 12 1 9
4.29103005457329 4.29217030555202 4.29273994384712 4.29387824789718
5 16 2 7
4.29558327814826 4.29615097614818 4.29728540621879 4.29898464197175
18 7 13 9
4.29955041284964 4.30068099521993 4.30124580743489 4.30237447572626
4 14 4 9
4.30293833252158 4.30406509320417 4.30462799780671 4.30575285731789
4 13 2 10
4.30631481293818 4.30743777768281 4.30911986386579 4.3096799310885
3 15 20 1
4.31079912538551 4.31247557171277 4.31303376318693 4.3141492122708
12 19 5 13
4.31470647057443 4.31582005643561 4.31637638468362 4.31748811353631
1 10 2 19
4.31804351482801 4.31915339285537 4.31970787027462 4.32081590362899
1 8 1 17
4.32247565504735 4.32302829391193 4.32413265625498 4.32578691635101
28 2 16 16
4.32633772881329 4.32743844438948 4.32798834817018 4.32908724937966
5 19 1 20
4.32963624747196 4.33073334028633 4.33128143566865 4.33237672603006
6 14 5 14
4.33292392166615 4.33401741548752 4.33565541749176 4.33729074083249
1 18 15 13
4.33783525486718 4.33892339425638 4.33946702025509 4.34055338646731
1 20 1 12
4.34218072612668 4.34272258471485 4.34380542185368 4.34542748222555
17 3 24 10
4.34596758485818 4.34704691577786 4.34758614469359 4.34866373100476
1 19 2 24
4.34920208902584 4.3502779363593 4.35188954025364 4.35349855105934
2 16 34 20
4.35510497710762 4.35563987950069 4.35670882668959 4.35831010805657
31 3 26 9
4.35884329921822 4.35990882942026 4.36150499895308 4.36203648979738
5 18 14 2
4.36309862478836 4.3646897150206 4.36627827770574 4.36680723831051
29 27 33 1
4.36786432086138 4.36839244339808 4.36944785246702 4.36997513958707
37 2 32 1
4.37102888046434 4.37155533480659 4.37260741275739 4.37418345721286
16 1 39 17
4.3747082538662 4.37575702166029 4.3762809933778 4.37732811389233
5 43 1 22
4.3778512632634 4.37889674166495 4.3804629126977 4.38202663467388
1 57 46 37
4.38358791524083 4.38410780087771 4.38514676201012 4.38670318255778
35 1 31 10
4.38722145155099 4.38825718442452 4.38980877511594 4.39032543748858
3 52 10 2
4.39135796210277 4.39187382489471 4.39290475282106 4.39444915467244
41 1 36 55
4.39599117502425 4.39753082120985 4.39906810052873 4.39958000225478
57 36 18 1
4.40060302024682 4.40213558759659 4.40366580977736 4.40519369395542
35 10 67 14
4.40671924726425 4.40824247680477 4.40874970481464 4.40976338964548
95 46 1 47
4.41128199282267 4.41178768183471 4.41279829334063 4.41431229817185
38 1 40 6
4.41481645749687 4.41582401425717 4.41733344850603 4.4188406077966
1 71 9 127
4.42034549897602 4.42184812886055 4.42334850423579 4.42384812952722
28 76 37 1
4.42484663185681 4.42634251844839 4.42783617070518 4.42932759529185
39 16 64 5
4.43081679884331 4.43230378796489 4.43378856923247 4.43527114919269
129 17 152 26
4.43675153436313 4.43822973123244 4.43970574626056 4.44117958587886
36 15 43 5
4.44265125649032 4.44412076446968 4.44461012097565 4.44558811616363
107 3 1 195
4.44705331789095 4.44851637594271 4.44997729658239 4.45143608604605
15 119 22 37
4.45289275054251 4.45434729625351 4.45579972933382 4.45725005591147
5 57 2 186
4.45869828208783 4.45918055844153 4.46014441393783 4.46158845751007
3 1 233 14
4.46303041882697 4.46447030388496 4.46590811865458 4.46734386908069
55 8 24 2
4.46877756108254 4.47020920055397 4.47163879336357 4.47306634535475
84 1 275 3
4.47449186234598 4.47591535013083 4.47733681447821 4.47875626113243
136 6 33 6
4.48017369581341 4.48300255201388 4.48582342835553 4.48723088812341
39 162 238 7
4.48863636973214 4.49003987873446 4.49144142065975 4.49423862528081
61 4 21 64
4.49702802736839 4.49841981604121 4.49980967033027 4.50119759560511
201 2 110 5
4.50258359721299 4.50534985070588 4.50810847314496 4.51085950651685
14 21 102 161
4.51223219032882 4.5136029924626 4.51497191806994 4.51633897228148
4 48 1 4
4.51906748693468 4.52178857704904 4.52450228292064 4.52585637926837
38 116 89 1
4.52720864451838 4.52990770148754 4.53259949315326 4.53528405852393
11 2 63 88
4.53796143629464 4.53929744183738 4.54063166485052 4.54329478227
29 1 1 1
4.54595082632812 4.5485998344997 4.55124184396254 4.55912624748668
80 48 1 118
4.56174062806076 4.56434819146784 4.56954300834494 4.57213033190989
5 1 4 18
4.5798523780038 4.58241319886548 4.58496747867057 4.59259140378123
1 2 2 1
4.59511985013459 4.60517018598809
1 4
Code
SF_mh_preF_factor_log<- lm (log (SF_mh) ~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
plot (SF_mh_preF_factor_log, 2 )
Code
shapiro.test (resid (SF_mh_preF_factor_log))
Shapiro-Wilk normality test
data: resid(SF_mh_preF_factor_log)
W = 0.70988, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066108 1 1.032525
Weight 1.207108 1 1.098685
Height 1.140604 1 1.067991
as.factor(Intervention_months_factor) 1.003858 4 1.000481
Postmenopausal females:
Code
#### FEMALE post
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity bad, constant variance looks bad (heteroscedasticity)
plot (SF_mh_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_postF, which = 3 ) # Linearity bad, constant variance bad
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_postF) # reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_postF
BP = 22.92, df = 7, p-value = 0.00176
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_postF)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_postF)
W = 0.80728, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3296 13.8 11.0 19.8 14.2 0 80.2 -66.5 0.0102 11.8
2 5278 29.7 5.98 36.8 4.24 2 76.7 -47.0 0.0129 11.9
3 6670 22.6 3.98 1.81 -9.76 4 77.8 -55.2 0.00898 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 9 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 514 27.1 7.98 -6.19 6.24 0 80.9 -53.7 0.00520 11.9
2 1423 25.8 4.98 -13.2 -6.76 2 79.7 -53.9 0.00491 11.9
3 1438 19.2 8.98 11.8 -5.76 2 78.7 -59.4 0.00516 11.9
4 1689 27.2 3.98 -4.19 -1.76 0 78.8 -51.6 0.00317 11.9
5 3296 13.8 11.0 19.8 14.2 0 80.2 -66.5 0.0102 11.8
6 3954 33.1 17.0 -2.19 -16.8 2 80.9 -47.8 0.00554 11.9
7 4753 28.2 4.98 8.81 -8.76 0 77.2 -49.0 0.00407 11.9
8 5844 31.5 11.0 -12.2 -17.8 0 79.8 -48.3 0.00355 11.9
9 6670 22.6 3.98 1.81 -9.76 4 77.8 -55.2 0.00898 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
Code
# log transform to pass normality.
SF_mh_postF_factor_fix <- lm (log1p (SF_mh) ~ log1p (Age) + log1p (Weight) + log1p (Height) + Intervention_months_factor,
data = data_scale,
subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
qqnorm (resid (SF_mh_postF_factor_fix), col = "grey" )
qqline (resid (SF_mh_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_postF_factor_fix)
W = 0.65258, p-value = 1.188e-15
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035224 1 1.017459
Weight 1.147491 1 1.071210
Height 1.172199 1 1.082681
as.factor(Intervention_months_factor) 1.011478 4 1.001428
Males:
Code
### MALE
### LINEARITY (Lnearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (SF_mh_M, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (SF_mh_M, which = 3 ) # suspect
Code
# formal test in the form of Breusch-Pagan
bptest (SF_mh_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: SF_mh_M
BP = 24.332, df = 7, p-value = 0.0009959
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (SF_mh_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (SF_mh_M)) # reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(SF_mh_M)
W = 0.7886, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (SF_mh_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (SF_mh_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rownames SF_mh Age[,1] Weight…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3057 35.1 13.0 71.8 2.24 4 81.2 -46.0 0.0117 11.9
2 4049 19.5 -22.0 56.8 5.24 1 76.0 -56.5 0.00963 11.9
3 6804 3 -2.02 29.8 5.24 4 80.0 -77.0 0.00331 11.9
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = SF_mh), almha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 90 × 13
.rownames SF_mh Age[,1] Weigh…¹ Heigh…² as.fa…³ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 423 39.6 -9.02 -18.2 3.24 2 79.8 -40.2 3.02e-3 11.9
2 500 39.2 -4.02 -2.19 18.2 0 82.0 -42.8 1.94e-3 11.9
3 501 42.5 -14.0 1.81 3.24 0 79.1 -36.7 1.10e-3 11.9
4 581 38.5 -12.0 -7.19 1.24 0 79.5 -41.1 1.23e-3 11.9
5 662 31 24.0 16.8 1.24 0 84.9 -53.9 1.38e-3 11.9
6 759 38.2 12.0 -6.19 3.24 0 83.7 -45.4 1.02e-3 11.9
7 856 42.8 7.98 -6.19 3.24 0 83.0 -40.3 9.35e-4 11.9
8 897 31.9 -9.02 66.8 23.2 2 79.1 -47.2 8.56e-3 11.9
9 959 37.6 -5.02 13.8 8.24 0 80.7 -43.1 8.20e-4 11.9
10 974 41.2 15.0 17.8 7.24 0 83.8 -42.6 8.88e-4 11.9
# … with 80 more rows, 3 more variables: .cooksd <dbl>, .std.resid <dbl>,
# index <int>, and abbreviated variable names ¹Weight[,1], ²Height[,1],
# ³`as.factor(Intervention_months_factor)`
Code
#Cook's distance
cooksd = cooks.distance (SF_mh_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# log transform to pass normality.
SF_mh_Mfactor_fix <- lm (SF_mh~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (SF_mh_Mfactor_fix), col = "grey" )
qqline (resid (SF_mh_Mfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (SF_mh_Mfactor_fix))
Shapiro-Wilk normality test
data: resid(SF_mh_Mfactor_fix)
W = 0.68271, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (SF_mh_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.093016 1 1.045474
Weight 1.274398 1 1.128892
Height 1.251832 1 1.118853
as.factor(Intervention_months_factor) 1.007197 4 1.000897
We conclude that the assumptions were violated and thus we use robust models.
Robust models
Code
SF_mh_preF_robust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
SF_mh_postF_robust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
SF_mh_Mrobust <- lmrob (SF_mh ~ Age + Weight + Height + as.factor (Intervention_months_factor), data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (SF_mh_preF_robust, SF_mh_postF_robust, SF_mh_Mrobust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Robust: Mental health (Sf-36), higher score indicates better health" , show.reflvl = T)
Robust: Mental health (Sf-36), higher score indicates better health
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
83.37
82.56 – 84.18
<0.001
82.70
81.68 – 83.72
<0.001
84.61
84.16 – 85.06
<0.001
Age
0.13
0.08 – 0.18
<0.001
0.13
0.07 – 0.18
<0.001
0.11
0.09 – 0.13
<0.001
Weight
-0.00
-0.04 – 0.03
0.896
-0.05
-0.08 – -0.01
0.014
-0.02
-0.04 – 0.01
0.140
Height
0.02
-0.05 – 0.08
0.591
0.05
-0.01 – 0.12
0.104
0.05
0.01 – 0.09
0.008
6-12 months
-1.25
-2.25 – -0.24
0.016
-0.25
-1.29 – 0.79
0.638
0.10
-0.56 – 0.75
0.771
12-18 months
-0.04
-1.01 – 0.92
0.929
0.30
-0.75 – 1.34
0.579
-0.46
-1.13 – 0.22
0.184
18-24 months
-1.29
-2.90 – 0.32
0.117
-1.37
-3.16 – 0.42
0.133
-0.16
-1.10 – 0.79
0.745
24+ months
-0.98
-2.14 – 0.18
0.098
0.06
-1.13 – 1.24
0.925
-0.34
-1.02 – 0.34
0.330
Observations
2404
1476
3595
R2 / R2 adjusted
0.019 / 0.016
0.022 / 0.017
0.058 / 0.056
Warm glow
Warm glow is defined by a score of >= 6 on average on warm glow questions (3 questions).
Predict by ferritin
Code
# Barplot
data_scale_per2 <- data_scale %>%
group_by (Fergroup) %>%
count (Warmglow) %>%
mutate (
Percentage = round (proportions (n) * 100 , 1 ),
res = str_c (n, "(" , Percentage, ")%" )
)
ggplot (subset (data_scale_per2, ! is.na (Warmglow)), aes (Fergroup, Percentage, fill = Warmglow)) +
geom_col (position = "dodge" ) +
geom_text (aes (label = res), vjust = - 0.2 )+
theme_bw ()
Code
ggplot (data_scale , mapping = aes (x = Fergroup, fill = Warmglow)) +
geom_bar (position = "dodge" ) +
theme_bw ()
Code
# t-test + boxplot
res <- t.test (Ferritin ~ Warmglow, data = data_scale, var.equal = TRUE )
res
Two Sample t-test
data: Ferritin by Warmglow
t = -0.48007, df = 6602, p-value = 0.6312
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-2.511109 1.523153
sample estimates:
mean in group No mean in group Yes
37.39408 37.88806
Code
ggplot (subset (data_scale, ! is.na (Warmglow)), aes (x= Warmglow, y= Ferritin)) +
geom_boxplot () +
theme_bw () +
geom_text (x = 2.3 , y = 700 , label= paste ('Means:' , " " , means$ Ferritin[1 ], " " , means$ Ferritin[2 ], " \n " , 't(' ,res$ parameter, '), p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Warm glow" , y= "Ferritin" )
Models
Code
WG_preF <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 ,family = binomial)
WG_postF <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 ,family = binomial)
WG_M <- glm (Warmglow ~ Age + Weight + Height + Intervention_months_factor , data = data_scale, subset = data_scale$ Gender == "Male" ,family = binomial)
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (WG_preF, WG_postF, WG_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Warmglow, yes/no" , show.reflvl = T)
Warmglow, yes/no
Pre-menopausal women
Post-menopausal women
Men
Predictors
Odds Ratios
CI
p
Odds Ratios
CI
p
Odds Ratios
CI
p
Intercept
1.57
1.27 – 1.95
<0.001
1.58
1.18 – 2.11
0.002
1.53
1.35 – 1.73
<0.001
Age
1.03
1.02 – 1.04
<0.001
1.01
1.00 – 1.03
0.108
1.02
1.02 – 1.03
<0.001
Weight
1.00
0.99 – 1.01
0.954
0.99
0.98 – 1.00
0.265
1.01
1.00 – 1.01
0.030
Height
1.00
0.99 – 1.02
0.596
0.99
0.97 – 1.01
0.320
1.00
0.99 – 1.01
0.761
Intervention_months_factor1
0.99
0.77 – 1.26
0.911
0.86
0.63 – 1.18
0.355
0.83
0.67 – 1.02
0.078
Intervention_months_factor2
1.41
1.11 – 1.80
0.005
0.80
0.60 – 1.07
0.135
0.97
0.79 – 1.19
0.767
Intervention_months_factor3
1.03
0.75 – 1.40
0.866
0.43
0.27 – 0.68
<0.001
0.72
0.55 – 0.95
0.019
Intervention_months_factor4
1.06
0.81 – 1.39
0.655
0.90
0.62 – 1.34
0.605
0.81
0.65 – 1.00
0.053
Observations
2167
1403
3386
R2 Tjur
0.016
0.014
0.030
Assumptions
Premenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rowna…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 275 No -6.02 49.8 5.24 2 0.637 -1.46 -1.47 0.0125
2 4031 No 0.985 36.8 1.24 3 0.503 -1.40 -1.41 0.0118
3 5873 Yes -24.0 48.8 -4.76 1 -0.266 1.29 1.30 0.0179
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
cooksd = cooks.distance (WG_preF)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_preF,col= "red" ,id.n= 3 )
StudRes Hat CookD
275 -1.465201 0.012464664 0.003020593
339 1.203187 0.015112249 0.002036705
386 -1.550097 0.004144052 0.001205968
405 -1.552705 0.003995963 0.001169521
5873 1.301589 0.017863410 0.003021551
Code
car:: influenceIndexPlot (WG_preF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066877 1 1.032897
Weight 1.207036 1 1.098652
Height 1.139041 1 1.067259
Intervention_months_factor 1.002697 4 1.000337
Postmenopausal females:
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rowna…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1032 Yes 2.98 57.8 -2.76 1 0.0422 1.16 1.18 0.0300
2 2675 No 11.0 55.8 1.24 0 0.273 -1.30 -1.31 0.0239
3 5303 No 25.0 38.8 -7.76 2 0.425 -1.36 -1.38 0.0188
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
#Cook's distance
cooksd = cooks.distance (WG_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_postF,col= "red" ,id.n= 3 )
StudRes Hat CookD
1032 1.1722690 0.030003491 0.003821410
2675 -1.3078974 0.023885123 0.004119193
4404 0.7155689 0.043294905 0.001676371
5494 -1.6515737 0.004852339 0.001766253
6518 -1.6384692 0.004893419 0.001730525
Code
car:: influenceIndexPlot (WG_postF, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.036025 1 1.017853
Weight 1.159329 1 1.076721
Height 1.180711 1 1.086605
Intervention_months_factor 1.018630 4 1.002310
Code
### OUTLIERS
#Cook's distance
#plot 3largest
plot (WG_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (WG_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals not above 3, does not deserve closer attention
# A tibble: 3 × 13
.rown…¹ Warmg…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .std.…⁶ .hat
<chr> <fct> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 897 No -9.02 66.8 23.2 2 0.720 -1.49 -1.50 0.00889
2 1294 No -9.02 -3.19 -56.8 2 0.0864 -1.21 -1.23 0.0329
3 1510 No 25.0 41.8 -6.76 2 1.19 -1.71 -1.71 0.00557
# … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²Warmglow, ³Weight[,1], ⁴Height[,1],
# ⁵Intervention_months_factor, ⁶.std.resid
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = Warmglow), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # see zero influential data points
# A tibble: 0 × 13
# … with 13 variables: .rownames <chr>, Warmglow <fct>, Age <dbl[,1]>,
# Weight <dbl[,1]>, Height <dbl[,1]>, Intervention_months_factor <fct>,
# .fitted <dbl>, .resid <dbl>, .std.resid <dbl>, .hat <dbl>, .sigma <dbl>,
# .cooksd <dbl>, index <int>
Code
#Cook's distance
cooksd = cooks.distance (WG_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations exceed this threshold (* 3, see Stevens, 2002)
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# infleunce plot
car:: influencePlot (WG_M,col= "red" ,id.n= 3 )
StudRes Hat CookD
897 -1.500435 0.008894675 0.0023248247
1294 -1.229454 0.032856980 0.0047869881
1510 -1.712966 0.005574479 0.0023231165
5998 0.853050 0.011983516 0.0006676027
8023 -1.704294 0.002536407 0.0010373587
Code
car:: influenceIndexPlot (WG_M, id.n= 3 )
Code
#multicollinearity
car:: vif (WG_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.086787 1 1.042491
Weight 1.263387 1 1.124005
Height 1.249615 1 1.117862
Intervention_months_factor 1.005663 4 1.000706
Fatigue
Predict by ferritin
Code
res <- cor.test (data_scale$ CIS_Totalmean, data_scale$ Ferritin, method = "pearson" , use = "complete.obs" )
ggplot (data_scale, mapping = aes (x = CIS_Totalmean, y = Ferritin)) +
geom_point () +
theme_bw () +
geom_text (x = 120 , y = 1000 , label= paste ('r(' ,res$ parameter,') = ' , round (res$ estimate,3 ),', p = ' , round (res$ p.value, 3 ), sep = '' )) +
labs (x= "Fatigue, higher is worse" , y= "Ferritine" )
Code
# plot for cutoffs
data_scale <- data_scale %>% mutate (Fergroup = factor (case_when (Ferritin < 15 ~ 1 , Ferritin >= 15 & Ferritin <= 30 ~ 2 , Ferritin > 30 ~ 3 )) %>% fct_recode ("Ferritin < 15" = "1" , "Ferritin 15-30" = "2" , "Ferritin > 30" = "3" ))
ggplot (data_scale, aes (x= Fergroup, y= CIS_Totalmean)) +
geom_boxplot () +
theme_bw () +
labs (x= "Ferritin cut-off groups" , y= "Fatigue, higher is worse" )
Models
Code
CIS_preF <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M <- lm (CIS_Totalmean ~ Age + Weight + Height + Intervention_months_factor, data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF, CIS_postF, CIS_M, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse, show.reflvl = T, digits = 3" )
Fatigue, higher score is worse, show.reflvl = T, digits = 3
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
83.46
82.81 – 84.11
<0.001
84.63
83.76 – 85.50
<0.001
85.17
84.81 – 85.53
<0.001
Age
0.09
0.05 – 0.12
<0.001
0.07
0.02 – 0.12
0.003
0.09
0.08 – 0.10
<0.001
Weight
0.01
-0.01 – 0.03
0.460
0.02
-0.01 – 0.05
0.201
0.02
0.00 – 0.04
0.039
Height
-0.02
-0.07 – 0.02
0.275
-0.07
-0.12 – -0.01
0.019
-0.00
-0.03 – 0.03
0.861
Intervention_months_factor1
-0.32
-1.05 – 0.41
0.392
-0.16
-1.11 – 0.78
0.737
-0.13
-0.74 – 0.47
0.665
Intervention_months_factor2
0.66
-0.07 – 1.39
0.077
0.18
-0.71 – 1.08
0.684
-0.64
-1.21 – -0.06
0.030
Intervention_months_factor3
-0.52
-1.43 – 0.40
0.266
0.14
-1.26 – 1.54
0.845
-0.86
-1.67 – -0.06
0.035
Intervention_months_factor4
-0.57
-1.35 – 0.22
0.160
-0.33
-1.48 – 0.82
0.574
-0.38
-1.00 – 0.23
0.224
Observations
2360
1471
3558
R2 / R2 adjusted
0.016 / 0.013
0.012 / 0.007
0.054 / 0.052
Assumptions
Premenopausal females:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity good, constant variance (heteroscedasticity)
plot (CIS_preF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot (also known as spread-location plot)
plot (CIS_preF, which = 3 ) # Linearity good, constant variance
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_preF) # The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_preF
BP = 11.086, df = 7, p-value = 0.1349
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_preF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_preF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_preF)
W = 0.98288, p-value = 2.994e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_preF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_preF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3740 122 -8.02 -2.19 -6.76 3 82.4 39.6 0.00475 6.28
2 6127 60 -15.0 31.8 -0.760 1 82.1 -22.1 0.00764 6.31
3 7695 107 -23.0 -9.19 -6.76 3 81.0 26.0 0.00504 6.31
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 3 ) # some influential data points
# A tibble: 15 × 13
.rown…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 3006 61 -13.0 -17.2 -6.76 4 81.8 -20.8 0.00334 6.32
2 3213 64 -2.02 8.81 -13.8 0 83.7 -19.7 0.00386 6.32
3 3514 61 -17.0 -15.2 -11.8 4 81.6 -20.6 0.00358 6.32
4 3740 122 -8.02 -2.19 -6.76 3 82.4 39.6 0.00475 6.28
5 4126 58 -10.0 1.81 1.24 0 82.6 -24.6 0.00167 6.31
6 5337 64 -3.02 -18.2 -11.8 0 83.3 -19.3 0.00268 6.32
7 5578 62 -17.0 -9.19 -3.76 4 81.4 -19.4 0.00318 6.32
8 5674 101 -23.0 8.81 2.24 0 81.5 19.5 0.00313 6.32
9 6127 60 -15.0 31.8 -0.760 1 82.1 -22.1 0.00764 6.31
10 6282 104 -14.0 -18.2 -1.76 2 82.8 21.2 0.00321 6.32
11 6357 61 -24.0 -17.2 -7.76 2 82.1 -21.1 0.00344 6.32
12 6859 55 -16.0 -18.2 -8.76 3 81.6 -26.6 0.00475 6.31
13 7026 58 -6.02 -13.2 -8.76 0 83.0 -25.0 0.00171 6.31
14 7239 59 -24.0 -24.2 -11.8 0 81.4 -22.4 0.00240 6.31
15 7695 107 -23.0 -9.19 -6.76 3 81.0 26.0 0.00504 6.31
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CIS_preF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
# check log transform to pass normality.
data_scale2 <- data_scale
table (data_scale2$ CIS_Totalmean)
33 50 55 58
1 1 1 2
59 60 61 62
3 1 4 5
63 63.1578947368421 64 65
1 1 10 10
65.4545454545455 65.5555555555556 66 67
1 1 20 10
67.2727272727273 67.3684210526316 68 68.4210526315789
1 1 33 2
69 69.4736842105263 70 70.5263157894737
47 1 30 2
71 71.5789473684211 72 72.6315789473684
60 5 70 2
73 73.6842105263158 74 74.1176470588235
85 10 114 1
74.7368421052632 75 75.2941176470588 75.7894736842105
6 140 1 5
76 76.8421052631579 77 77.7777777777778
162 7 186 1
77.8947368421053 78 78.8235294117647 78.8888888888889
14 238 1 1
78.9473684210526 79 80 81
11 246 301 342
81.0526315789474 81.1111111111111 81.1764705882353 81.25
9 1 1 1
82 82.1052631578947 82.2222222222222 83
362 27 1 467
83.1578947368421 83.3333333333333 83.5294117647059 84
40 1 1 489
84.2105263157895 84.4444444444444 84.7058823529412 85
25 2 1 525
85.2631578947368 86 86.3157894736842 86.6666666666667
30 773 20 5
87 87.3684210526316 87.5 88
542 27 1 517
88.421052631579 88.8888888888889 89 89.4736842105263
12 3 398 17
90 90.5263157894737 90.5882352941177 91
299 22 1 249
91.1111111111111 91.578947368421 92 92.2222222222222
2 6 217 1
92.6315789473684 93 93.3333333333333 93.6842105263158
9 152 2 9
94 94.1176470588235 94.7368421052632 95
97 1 6 96
95.5555555555556 95.7894736842105 96 96.6666666666667
1 1 64 1
96.8421052631579 97 97.8947368421053 98
7 35 6 38
98.8888888888889 98.9473684210526 99 100
1 2 29 8
101 101.052631578947 102 102.105263157895
16 3 6 1
103 104 104.210526315789 105
7 4 1 1
106 107 108 108.421052631579
4 3 3 2
110 110.526315789474 112.631578947368 114
2 1 1 3
115 116 116.842105263158 117
2 4 1 2
119 121 122 123
1 1 1 1
125 128 128.888888888889 131
1 2 1 1
140
1
Code
data_scale2$ CIS_Totalmean <- log10 (data_scale$ CIS_Totalmean)
table (data_scale2$ CIS_Totalmean)
1.51851393987789 1.69897000433602 1.74036268949424 1.76342799356294
1 1 1 2
1.77085201164214 1.77815125038364 1.78532983501077 1.79239168949825
3 1 4 5
1.79934054945358 1.8004276450948 1.80617997398389 1.81291335664286
1 1 10 10
1.81593981127304 1.81660950220282 1.81954393554187 1.82607480270083
1 1 20 10
1.82783903457275 1.82845636869504 1.83250891270624 1.83518975135401
1 1 33 2
1.83884909073726 1.84182033025302 1.84509804001426 1.84835119741198
47 1 30 2
1.85125834871908 1.85478530741739 1.85733249643127 1.86112548544841
60 5 70 2
1.86332286012046 1.86737443472541 1.86923171973098 1.86992162373929
85 10 114 1
1.87353474343023 1.8750612633917 1.87676104826959 1.87960889114242
6 140 1 5
1.88081359228079 1.88559925483161 1.88649072517248 1.89085553057493
162 7 186 1
1.89150811444213 1.89209460269048 1.89665587698653 1.89701583927975
14 238 1 1
1.89733765810285 1.89762709129044 1.90308998699194 1.90848501887865
11 246 301 342
1.90876711988363 1.90908035068113 1.90943016502296 1.90982336965091
9 1 1 1
1.91381385238372 1.91437099740163 1.91498921029165 1.91907809237607
362 27 1 467
1.91990348600159 1.92081875395238 1.92183942300478 1.92427928606188
40 1 1 489
1.9253663817031 1.92657108284147 1.92791357071698 1.92941892571429
25 2 1 525
1.9307614135898 1.93449845124357 1.93609024709487 1.93785209325116
30 773 20 5
1.93951925261862 1.94135448708723 1.94200805302231 1.94448267215017
542 27 1 517
1.94655568077303 1.94884747755262 1.94939000664491 1.95169532042545
12 3 398 17
1.95424250943932 1.95677484595472 1.95707179945819 1.95904139232109
299 22 1 249
1.95957134294439 1.96179564732977 1.96378782734556 1.96483558293675
2 6 217 1
1.96675906686132 1.96848294855394 1.97003677662256 1.97166640135606
9 152 2 9
1.9731278535997 1.97367106127765 1.97651890415048 1.97772360528885
97 1 6 96
1.98025594180424 1.98131778703225 1.98227123303957 1.98527674317929
1 1 64 1
1.98606422205671 1.98677173426624 1.99075934326509 1.99122607569249
7 35 6 38
1.99514749720559 1.99540424831085 1.99563519459755 2
1 2 29 8
2.00432137378264 2.00454762775072 2.00860017176192 2.0090481289774
16 3 6 1
2.01283722470517 2.01703333929878 2.0179115893087 2.02118929906994
7 4 1 1
2.02530586526477 2.02938377768521 2.03342375548695 2.03511361941632
4 3 3 2
2.04139268515822 2.04346569378109 2.05166017239636 2.05690485133647
2 1 1 3
2.06069784035361 2.06445798922692 2.06759937349781 2.06818586174616
2 4 1 2
2.07554696139253 2.08278537031645 2.08635983067475 2.0899051114394
1 1 1 1
2.09691001300806 2.10720996964787 2.11021547978759 2.11727129565576
1 2 1 1
2.14612803567824
1
Code
#analaysis with log transformed
CIS_preF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 0 )
qqnorm (resid (CIS_preF_factor_fix), col = "grey" )
qqline (resid (CIS_preF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_preF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_preF_factor_fix)
W = 0.97017, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_preF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.066081 1 1.032512
Weight 1.204323 1 1.097417
Height 1.137514 1 1.066543
Intervention_months_factor 1.003466 4 1.000433
Postmenopausal females: *
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Linearity ok, constant variance looks ok(heteroscedasticity)
plot (CIS_postF, which = 1 )
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_postF, which = 3 ) # constant variance ok
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_postF) # we fail to reject the null of homoscedasticity. The constant variance assumption is not violated.
studentized Breusch-Pagan test
data: CIS_postF
BP = 11.319, df = 7, p-value = 0.1253
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_postF, 2 ) #suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_postF)) #fail to reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_postF)
W = 0.94034, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_postF, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_postF) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1208 140 12.0 6.81 -13.8 1 86.4 53.6 0.00638 6.33
2 6761 123 9.98 -2.19 -13.8 4 85.9 37.1 0.00833 6.41
3 7896 128 6.98 -18.2 -4.76 4 84.8 43.2 0.00865 6.39
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 8 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1208 140 12.0 6.81 -13.8 1 86.4 53.6 0.00638 6.33
2 1606 116 11.0 -0.189 -10.8 3 86.3 29.7 0.0116 6.44
3 1881 116 19.0 4.81 -8.76 2 86.9 29.1 0.00466 6.44
4 2120 59 9.98 -13.2 -6.76 0 85.6 -26.6 0.00200 6.45
5 6229 115 4.98 -18.2 -16.8 0 85.8 29.2 0.00487 6.44
6 6761 123 9.98 -2.19 -13.8 4 85.9 37.1 0.00833 6.41
7 7035 116 22.0 -13.2 1.24 1 85.8 30.2 0.00775 6.44
8 7896 128 6.98 -18.2 -4.76 4 84.8 43.2 0.00865 6.39
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CIS_postF)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CIS_postF_factor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CIS_postF_factor_fix), col = "grey" )
qqline (resid (CIS_postF_factor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CIS_postF_factor_fix))
Shapiro-Wilk normality test
data: resid(CIS_postF_factor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_postF) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.035449 1 1.017570
Weight 1.149008 1 1.071918
Height 1.173224 1 1.083155
Intervention_months_factor 1.012342 4 1.001534
Males:
Code
### LINEARITY (Linearity and constant variance assumptions)
#Fitted versus Residuals Plot: Good
plot (CIS_M, which = 1 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
### HOMOGEINITY OF VARIANCE
# Display scale-location plot, also known as the spread-location plot
plot (CIS_M, which = 3 ) # Linearity ok, constant variance looks ok(heteroscedasticity)
Code
# formal test in the form of Breusch-Pagan
bptest (CIS_M)# small p-value, so we reject the null of homoscedasticity. The constant variance assumption is violated.
studentized Breusch-Pagan test
data: CIS_M
BP = 16.758, df = 7, p-value = 0.01903
Code
### NORMALITY OF RESIDUALS
#QQ plot
plot (CIS_M, 2 ) # suspect
Code
# formal test in the form of Shapiro-Wilk
shapiro.test (resid (CIS_M)) #reject for the data sampled from normal
Shapiro-Wilk normality test
data: resid(CIS_M)
W = 0.9592, p-value < 2.2e-16
Code
### OUTLIERS
#plot 3largest
plot (CIS_M, which = 4 , id.n = 3 )
Code
# inspect the largest
model.data <- augment (CIS_M) %>%
mutate (index = 1 : n ())
model.data %>% top_n (3 , .cooksd) # residuals are above 3, does deserve closer attention
# A tibble: 3 × 13
.rowna…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 1006 128 26.0 35.8 3.24 1 88.1 39.9 0.00424 6.12
2 1603 125 12.0 6.81 17.2 3 85.5 39.5 0.00488 6.12
3 5561 33 29.0 -7.19 -5.76 4 87.3 -54.3 0.00387 6.09
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
# Plot standardized residuals
ggplot (model.data, aes (index, .std.resid)) +
geom_point (aes (color = CIS_Totalmean), alpha = .5 ) +
theme_bw ()
Code
# Filter potential influential data point
model.data %>%
filter (abs (.std.resid) > 4 ) # some influential data points
# A tibble: 12 × 13
.rown…¹ CIS_T…² Age[,1] Weigh…³ Heigh…⁴ Inter…⁵ .fitted .resid .hat .sigma
<chr> <dbl> <dbl> <dbl> <dbl> <fct> <dbl> <dbl> <dbl> <dbl>
1 697 129. 12.0 14.8 11.2 0 86.5 42.4 8.28e-4 6.11
2 1006 128 26.0 35.8 3.24 1 88.1 39.9 4.24e-3 6.12
3 1089 116 22.0 19.8 9.24 1 87.4 28.6 2.55e-3 6.13
4 1603 125 12.0 6.81 17.2 3 85.5 39.5 4.88e-3 6.12
5 1807 117 16.0 -8.19 -8.76 2 85.8 31.2 3.24e-3 6.13
6 2028 114 26.0 -9.19 -8.76 0 87.4 26.6 2.44e-3 6.14
7 3288 119 22.0 24.8 6.24 0 87.6 31.4 1.47e-3 6.13
8 4216 121 14.0 0.811 5.24 4 86.1 34.9 2.36e-3 6.13
9 4969 117 2.98 29.8 1.24 1 85.9 31.1 3.74e-3 6.13
10 5561 33 29.0 -7.19 -5.76 4 87.3 -54.3 3.87e-3 6.09
11 7370 50 0.985 11.8 8.24 1 85.3 -35.3 1.96e-3 6.13
12 8028 114 0.985 12.8 -6.76 0 85.5 28.5 2.27e-3 6.14
# … with 3 more variables: .cooksd <dbl>, .std.resid <dbl>, index <int>, and
# abbreviated variable names ¹.rownames, ²CIS_Totalmean, ³Weight[,1],
# ⁴Height[,1], ⁵Intervention_months_factor
Code
#Cook's distance
cooksd = cooks.distance (CIS_M)
# Plot Cook's Distance with a horizontal line at K+1/n to see which observations
#exceed this threshold
n <- nrow (data_scale)
plot (cooksd, main = "Cooks Distance for Influential Obs" )
abline (h = (8 / n) * 3 , lty = 2 , col = "steelblue" ) # add cutoff line
Code
#analaysis with log transformed
CISMfactor_fix <- lm (CIS_Totalmean~ Age + Weight + Height + as.factor (Intervention_months_factor),
data = data_scale2,
subset = data_scale2$ Gender == "Female" & data_scale2$ PostMeno == 1 )
qqnorm (resid (CISMfactor_fix), col = "grey" )
qqline (resid (CISMfactor_fix), col = "dodgerblue" , lwd = 2 )
Code
shapiro.test (resid (CISMfactor_fix))
Shapiro-Wilk normality test
data: resid(CISMfactor_fix)
W = 0.95649, p-value < 2.2e-16
Code
## Transforming does not seem to help. Many outliers. Solution -> robust regression, see chunk below.
#multicollinearity
car:: vif (CIS_M) # no VIF value that exceeds 5 or 10
GVIF Df GVIF^(1/(2*Df))
Age 1.092206 1 1.045087
Weight 1.272488 1 1.128046
Height 1.251062 1 1.118509
Intervention_months_factor 1.007520 4 1.000937
::: {.callout-note appearance=“default”} ### Decision
We conclude that the assumptions were violated and thus we use robust models. :::
Robust models
Code
CIS_preF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 0 )
CIS_postF_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Female" & data_scale$ PostMeno == 1 )
CIS_M_robust <- lmrob (CIS_Totalmean ~ Age + Weight + Height + as.factor (Intervention_months_factor) , data = data_scale, subset = data_scale$ Gender == "Male" )
pl <- c (
` (Intercept) ` = "Intercept" ,
` as.factor(Intervention_months_factor)1 ` = "6-12 months" ,
` as.factor(Intervention_months_factor)2 ` = "12-18 months" ,
` as.factor(Intervention_months_factor)3 ` = "18-24 months" ,
` as.factor(Intervention_months_factor)4 ` = "24+ months"
)
tab_model (CIS_preF_robust, CIS_postF_robust, CIS_M_robust, pred.labels = pl, dv.labels = c ("Pre-menopausal women" , "Post-menopausal women" , "Men" ), title = "Fatigue, higher score is worse (robust regression)" , show.reflvl = T, digits = 3 )
Fatigue, higher score is worse (robust regression)
Pre-menopausal women
Post-menopausal women
Men
Predictors
Estimates
CI
p
Estimates
CI
p
Estimates
CI
p
Intercept
83.728
83.133 – 84.323
<0.001
84.805
84.045 – 85.565
<0.001
85.311
84.982 – 85.641
<0.001
Age
0.087
0.053 – 0.121
<0.001
0.073
0.029 – 0.117
0.001
0.080
0.067 – 0.092
<0.001
Weight
0.005
-0.019 – 0.029
0.701
0.014
-0.011 – 0.039
0.273
0.012
-0.007 – 0.031
0.217
Height
-0.022
-0.066 – 0.022
0.321
-0.051
-0.100 – -0.002
0.042
0.001
-0.027 – 0.029
0.945
6-12 months
-0.332
-1.051 – 0.387
0.366
-0.290
-1.094 – 0.515
0.480
-0.403
-0.921 – 0.116
0.128
12-18 months
0.707
-0.030 – 1.444
0.060
0.324
-0.447 – 1.094
0.410
-0.502
-1.015 – 0.011
0.055
18-24 months
-0.855
-1.751 – 0.040
0.061
-0.158
-1.313 – 0.996
0.788
-0.784
-1.610 – 0.042
0.063
24+ months
-0.377
-1.172 – 0.418
0.352
-0.491
-1.536 – 0.554
0.357
-0.271
-0.820 – 0.278
0.334
Observations
2360
1471
3558
R2 / R2 adjusted
0.019 / 0.016
0.015 / 0.010
0.055 / 0.053